{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.979883566776183, "eval_steps": 1000, "global_step": 8500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.2900763358778623e-09, "logits/chosen": -3.078238010406494, "logits/rejected": -3.036238670349121, "logps/chosen": -119.78216552734375, "logps/rejected": -81.49699401855469, "loss": 0.6813, "rewards/accuracies": 0.375, "rewards/chosen": -0.07474613189697266, "rewards/margins": 0.049993135035037994, "rewards/rejected": -0.12473927438259125, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.580152671755725e-09, "logits/chosen": -3.19315242767334, "logits/rejected": -3.0488533973693848, "logps/chosen": -379.8439636230469, "logps/rejected": -246.86712646484375, "loss": 0.7741, "rewards/accuracies": 0.125, "rewards/chosen": -0.25631868839263916, "rewards/margins": -0.1461872160434723, "rewards/rejected": -0.11013145744800568, "step": 2 }, { "epoch": 0.0, "learning_rate": 6.870229007633587e-09, "logits/chosen": -3.103341579437256, "logits/rejected": -3.2139434814453125, "logps/chosen": -155.2957763671875, "logps/rejected": -280.8891296386719, "loss": 0.5763, "rewards/accuracies": 0.5, "rewards/chosen": 0.11268434673547745, "rewards/margins": 0.2866156995296478, "rewards/rejected": -0.17393136024475098, "step": 3 }, { "epoch": 0.0, "learning_rate": 9.16030534351145e-09, "logits/chosen": -2.8749852180480957, "logits/rejected": -2.8633487224578857, "logps/chosen": -414.8719787597656, "logps/rejected": -280.2159118652344, "loss": 0.9265, "rewards/accuracies": 0.25, "rewards/chosen": -0.3218037188053131, "rewards/margins": -0.36112719774246216, "rewards/rejected": 0.03932347521185875, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.1450381679389314e-08, "logits/chosen": -3.214733839035034, "logits/rejected": -3.014448642730713, "logps/chosen": -287.48492431640625, "logps/rejected": -319.5427551269531, "loss": 0.634, "rewards/accuracies": 0.5, "rewards/chosen": 0.0066321855410933495, "rewards/margins": 0.18307408690452576, "rewards/rejected": -0.17644190788269043, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.3740458015267175e-08, "logits/chosen": -3.198916435241699, "logits/rejected": -3.1198673248291016, "logps/chosen": -306.1859436035156, "logps/rejected": -256.5517578125, "loss": 0.7132, "rewards/accuracies": 0.5, "rewards/chosen": -0.3292255699634552, "rewards/margins": 0.04297979548573494, "rewards/rejected": -0.37220534682273865, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.6030534351145036e-08, "logits/chosen": -3.576998472213745, "logits/rejected": -3.4146039485931396, "logps/chosen": -218.9892578125, "logps/rejected": -149.21798706054688, "loss": 0.8367, "rewards/accuracies": 0.625, "rewards/chosen": -0.24793888628482819, "rewards/margins": -0.1352853775024414, "rewards/rejected": -0.11265352368354797, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.83206106870229e-08, "logits/chosen": -3.1488728523254395, "logits/rejected": -3.140510082244873, "logps/chosen": -531.25732421875, "logps/rejected": -244.979736328125, "loss": 0.852, "rewards/accuracies": 0.375, "rewards/chosen": -0.35448914766311646, "rewards/margins": -0.252580851316452, "rewards/rejected": -0.10190828144550323, "step": 8 }, { "epoch": 0.0, "learning_rate": 2.0610687022900764e-08, "logits/chosen": -3.243004083633423, "logits/rejected": -2.971198558807373, "logps/chosen": -338.7559509277344, "logps/rejected": -160.9521484375, "loss": 0.8119, "rewards/accuracies": 0.5, "rewards/chosen": -0.358298122882843, "rewards/margins": -0.13543759286403656, "rewards/rejected": -0.22286053001880646, "step": 9 }, { "epoch": 0.0, "learning_rate": 2.2900763358778627e-08, "logits/chosen": -3.717552661895752, "logits/rejected": -3.6545305252075195, "logps/chosen": -293.1756591796875, "logps/rejected": -308.94097900390625, "loss": 0.5919, "rewards/accuracies": 0.75, "rewards/chosen": -0.016703132539987564, "rewards/margins": 0.2437538057565689, "rewards/rejected": -0.260456919670105, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.5190839694656487e-08, "logits/chosen": -3.0810492038726807, "logits/rejected": -2.7138595581054688, "logps/chosen": -140.66273498535156, "logps/rejected": -198.98141479492188, "loss": 0.7935, "rewards/accuracies": 0.5, "rewards/chosen": -0.24244041740894318, "rewards/margins": -0.13574858009815216, "rewards/rejected": -0.10669183731079102, "step": 11 }, { "epoch": 0.0, "learning_rate": 2.748091603053435e-08, "logits/chosen": -2.6906967163085938, "logits/rejected": -2.969830274581909, "logps/chosen": -533.2620239257812, "logps/rejected": -321.76251220703125, "loss": 0.914, "rewards/accuracies": 0.25, "rewards/chosen": -0.6085913777351379, "rewards/margins": -0.23153522610664368, "rewards/rejected": -0.3770561218261719, "step": 12 }, { "epoch": 0.0, "learning_rate": 2.9770992366412212e-08, "logits/chosen": -3.072509765625, "logits/rejected": -3.102613687515259, "logps/chosen": -334.9183349609375, "logps/rejected": -248.54986572265625, "loss": 0.679, "rewards/accuracies": 0.375, "rewards/chosen": -0.20921511948108673, "rewards/margins": 0.10817017406225204, "rewards/rejected": -0.3173852860927582, "step": 13 }, { "epoch": 0.0, "learning_rate": 3.206106870229007e-08, "logits/chosen": -3.446181297302246, "logits/rejected": -3.629345417022705, "logps/chosen": -188.58091735839844, "logps/rejected": -214.6560821533203, "loss": 0.6999, "rewards/accuracies": 0.625, "rewards/chosen": -0.14689147472381592, "rewards/margins": 0.1304509937763214, "rewards/rejected": -0.27734246850013733, "step": 14 }, { "epoch": 0.0, "learning_rate": 3.435114503816794e-08, "logits/chosen": -3.1664552688598633, "logits/rejected": -3.3170418739318848, "logps/chosen": -202.69989013671875, "logps/rejected": -311.98406982421875, "loss": 0.6956, "rewards/accuracies": 0.75, "rewards/chosen": -0.19405591487884521, "rewards/margins": 0.052658870816230774, "rewards/rejected": -0.246714785695076, "step": 15 }, { "epoch": 0.0, "learning_rate": 3.66412213740458e-08, "logits/chosen": -3.3414244651794434, "logits/rejected": -3.961564302444458, "logps/chosen": -99.53945922851562, "logps/rejected": -190.3000946044922, "loss": 0.5005, "rewards/accuracies": 0.75, "rewards/chosen": -0.06142204999923706, "rewards/margins": 0.6696228981018066, "rewards/rejected": -0.7310448884963989, "step": 16 }, { "epoch": 0.0, "learning_rate": 3.893129770992366e-08, "logits/chosen": -3.0124399662017822, "logits/rejected": -2.840730667114258, "logps/chosen": -269.3916015625, "logps/rejected": -235.80328369140625, "loss": 0.8463, "rewards/accuracies": 0.5, "rewards/chosen": -0.4099459648132324, "rewards/margins": -0.1655757576227188, "rewards/rejected": -0.24437019228935242, "step": 17 }, { "epoch": 0.0, "learning_rate": 4.122137404580153e-08, "logits/chosen": -3.5484561920166016, "logits/rejected": -3.308903932571411, "logps/chosen": -267.91192626953125, "logps/rejected": -269.2580871582031, "loss": 0.8622, "rewards/accuracies": 0.25, "rewards/chosen": -0.38084864616394043, "rewards/margins": -0.15083739161491394, "rewards/rejected": -0.2300112396478653, "step": 18 }, { "epoch": 0.0, "learning_rate": 4.351145038167938e-08, "logits/chosen": -2.7539050579071045, "logits/rejected": -2.8171746730804443, "logps/chosen": -444.2049865722656, "logps/rejected": -528.9166259765625, "loss": 0.7051, "rewards/accuracies": 0.5, "rewards/chosen": -0.11224745959043503, "rewards/margins": 0.049409493803977966, "rewards/rejected": -0.1616569459438324, "step": 19 }, { "epoch": 0.0, "learning_rate": 4.5801526717557254e-08, "logits/chosen": -3.1177096366882324, "logits/rejected": -3.0110886096954346, "logps/chosen": -105.60438537597656, "logps/rejected": -201.22738647460938, "loss": 0.7114, "rewards/accuracies": 0.625, "rewards/chosen": -0.17709673941135406, "rewards/margins": -0.0056163109838962555, "rewards/rejected": -0.1714804470539093, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.809160305343511e-08, "logits/chosen": -3.28956937789917, "logits/rejected": -3.1621954441070557, "logps/chosen": -218.82611083984375, "logps/rejected": -224.52459716796875, "loss": 0.7087, "rewards/accuracies": 0.5, "rewards/chosen": -0.22030237317085266, "rewards/margins": 0.13378384709358215, "rewards/rejected": -0.3540862202644348, "step": 21 }, { "epoch": 0.0, "learning_rate": 5.038167938931297e-08, "logits/chosen": -3.541567087173462, "logits/rejected": -3.389125108718872, "logps/chosen": -234.33401489257812, "logps/rejected": -265.4866638183594, "loss": 1.0767, "rewards/accuracies": 0.375, "rewards/chosen": -0.44112852215766907, "rewards/margins": -0.5136497616767883, "rewards/rejected": 0.07252121716737747, "step": 22 }, { "epoch": 0.0, "learning_rate": 5.267175572519083e-08, "logits/chosen": -3.6236681938171387, "logits/rejected": -3.337611675262451, "logps/chosen": -161.0250244140625, "logps/rejected": -152.80413818359375, "loss": 0.8573, "rewards/accuracies": 0.375, "rewards/chosen": -0.43034422397613525, "rewards/margins": -0.14431187510490417, "rewards/rejected": -0.28603237867355347, "step": 23 }, { "epoch": 0.0, "learning_rate": 5.49618320610687e-08, "logits/chosen": -3.0478150844573975, "logits/rejected": -2.997501850128174, "logps/chosen": -262.8673095703125, "logps/rejected": -198.7688751220703, "loss": 0.7549, "rewards/accuracies": 0.375, "rewards/chosen": -0.3380405902862549, "rewards/margins": -0.09912433475255966, "rewards/rejected": -0.23891624808311462, "step": 24 }, { "epoch": 0.0, "learning_rate": 5.7251908396946565e-08, "logits/chosen": -2.9718570709228516, "logits/rejected": -2.6679067611694336, "logps/chosen": -173.62368774414062, "logps/rejected": -180.52847290039062, "loss": 0.8252, "rewards/accuracies": 0.25, "rewards/chosen": -0.6487332582473755, "rewards/margins": -0.1877424120903015, "rewards/rejected": -0.4609908163547516, "step": 25 }, { "epoch": 0.0, "learning_rate": 5.9541984732824424e-08, "logits/chosen": -3.1790597438812256, "logits/rejected": -3.1685009002685547, "logps/chosen": -451.2270812988281, "logps/rejected": -257.05303955078125, "loss": 0.9584, "rewards/accuracies": 0.5, "rewards/chosen": -0.4195936322212219, "rewards/margins": -0.36516913771629333, "rewards/rejected": -0.054424479603767395, "step": 26 }, { "epoch": 0.0, "learning_rate": 6.183206106870229e-08, "logits/chosen": -3.7453653812408447, "logits/rejected": -3.687110662460327, "logps/chosen": -293.18316650390625, "logps/rejected": -310.10235595703125, "loss": 0.7963, "rewards/accuracies": 0.375, "rewards/chosen": -0.6200013756752014, "rewards/margins": -0.025363504886627197, "rewards/rejected": -0.5946378707885742, "step": 27 }, { "epoch": 0.0, "learning_rate": 6.412213740458014e-08, "logits/chosen": -3.2591514587402344, "logits/rejected": -3.551267147064209, "logps/chosen": -105.18576049804688, "logps/rejected": -138.39620971679688, "loss": 0.6627, "rewards/accuracies": 0.5, "rewards/chosen": -0.13792753219604492, "rewards/margins": 0.1355496644973755, "rewards/rejected": -0.2734771966934204, "step": 28 }, { "epoch": 0.0, "learning_rate": 6.641221374045801e-08, "logits/chosen": -2.7191622257232666, "logits/rejected": -3.007643222808838, "logps/chosen": -506.18853759765625, "logps/rejected": -316.33709716796875, "loss": 0.9049, "rewards/accuracies": 0.25, "rewards/chosen": -0.6219179630279541, "rewards/margins": -0.28037935495376587, "rewards/rejected": -0.34153860807418823, "step": 29 }, { "epoch": 0.0, "learning_rate": 6.870229007633587e-08, "logits/chosen": -3.357670545578003, "logits/rejected": -3.2435083389282227, "logps/chosen": -334.81182861328125, "logps/rejected": -338.9449157714844, "loss": 0.9027, "rewards/accuracies": 0.25, "rewards/chosen": -0.5266609191894531, "rewards/margins": -0.25207364559173584, "rewards/rejected": -0.2745872735977173, "step": 30 }, { "epoch": 0.0, "learning_rate": 7.099236641221374e-08, "logits/chosen": -2.9724888801574707, "logits/rejected": -3.1793935298919678, "logps/chosen": -194.36685180664062, "logps/rejected": -197.75302124023438, "loss": 0.6695, "rewards/accuracies": 0.625, "rewards/chosen": -0.14510780572891235, "rewards/margins": 0.10041507333517075, "rewards/rejected": -0.2455228865146637, "step": 31 }, { "epoch": 0.0, "learning_rate": 7.32824427480916e-08, "logits/chosen": -3.429110527038574, "logits/rejected": -3.6405317783355713, "logps/chosen": -302.10711669921875, "logps/rejected": -260.0133361816406, "loss": 0.6717, "rewards/accuracies": 0.625, "rewards/chosen": -0.23328939080238342, "rewards/margins": 0.17789646983146667, "rewards/rejected": -0.4111858606338501, "step": 32 }, { "epoch": 0.0, "learning_rate": 7.557251908396946e-08, "logits/chosen": -2.4055440425872803, "logits/rejected": -2.447580337524414, "logps/chosen": -274.1697998046875, "logps/rejected": -203.53143310546875, "loss": 0.6863, "rewards/accuracies": 0.75, "rewards/chosen": -0.032405950129032135, "rewards/margins": 0.028429530560970306, "rewards/rejected": -0.06083548069000244, "step": 33 }, { "epoch": 0.0, "learning_rate": 7.786259541984733e-08, "logits/chosen": -2.4708945751190186, "logits/rejected": -2.684777021408081, "logps/chosen": -301.1230773925781, "logps/rejected": -209.44749450683594, "loss": 0.9305, "rewards/accuracies": 0.5, "rewards/chosen": -0.5941293239593506, "rewards/margins": -0.2963210642337799, "rewards/rejected": -0.2978082299232483, "step": 34 }, { "epoch": 0.0, "learning_rate": 8.015267175572519e-08, "logits/chosen": -3.1385161876678467, "logits/rejected": -3.1987080574035645, "logps/chosen": -319.5728759765625, "logps/rejected": -289.68719482421875, "loss": 0.9848, "rewards/accuracies": 0.125, "rewards/chosen": -0.709468424320221, "rewards/margins": -0.4550679922103882, "rewards/rejected": -0.25440046191215515, "step": 35 }, { "epoch": 0.0, "learning_rate": 8.244274809160306e-08, "logits/chosen": -3.3285205364227295, "logits/rejected": -3.2466812133789062, "logps/chosen": -153.22515869140625, "logps/rejected": -148.2000274658203, "loss": 0.8992, "rewards/accuracies": 0.375, "rewards/chosen": -0.5575298070907593, "rewards/margins": -0.28044992685317993, "rewards/rejected": -0.27707988023757935, "step": 36 }, { "epoch": 0.0, "learning_rate": 8.473282442748092e-08, "logits/chosen": -2.8870813846588135, "logits/rejected": -3.0496702194213867, "logps/chosen": -301.7391052246094, "logps/rejected": -248.8736572265625, "loss": 0.6407, "rewards/accuracies": 0.625, "rewards/chosen": -0.1857985556125641, "rewards/margins": 0.19316615164279938, "rewards/rejected": -0.37896469235420227, "step": 37 }, { "epoch": 0.0, "learning_rate": 8.702290076335876e-08, "logits/chosen": -2.786118507385254, "logits/rejected": -2.9129960536956787, "logps/chosen": -311.86468505859375, "logps/rejected": -313.30462646484375, "loss": 0.6125, "rewards/accuracies": 0.625, "rewards/chosen": -0.07735371589660645, "rewards/margins": 0.21646477282047272, "rewards/rejected": -0.29381847381591797, "step": 38 }, { "epoch": 0.0, "learning_rate": 8.931297709923663e-08, "logits/chosen": -2.8042986392974854, "logits/rejected": -2.8723156452178955, "logps/chosen": -321.5715637207031, "logps/rejected": -320.4477233886719, "loss": 0.6451, "rewards/accuracies": 0.75, "rewards/chosen": -0.01878397725522518, "rewards/margins": 0.12471916526556015, "rewards/rejected": -0.14350314438343048, "step": 39 }, { "epoch": 0.0, "learning_rate": 9.160305343511451e-08, "logits/chosen": -3.1336617469787598, "logits/rejected": -2.9629650115966797, "logps/chosen": -503.5635070800781, "logps/rejected": -265.04559326171875, "loss": 0.8828, "rewards/accuracies": 0.375, "rewards/chosen": -0.55012047290802, "rewards/margins": -0.273624449968338, "rewards/rejected": -0.2764959931373596, "step": 40 }, { "epoch": 0.0, "learning_rate": 9.389312977099237e-08, "logits/chosen": -2.956836700439453, "logits/rejected": -2.9640579223632812, "logps/chosen": -179.7181396484375, "logps/rejected": -188.00875854492188, "loss": 0.5287, "rewards/accuracies": 1.0, "rewards/chosen": 0.000301264226436615, "rewards/margins": 0.44891369342803955, "rewards/rejected": -0.44861239194869995, "step": 41 }, { "epoch": 0.0, "learning_rate": 9.618320610687021e-08, "logits/chosen": -3.740957736968994, "logits/rejected": -3.6143572330474854, "logps/chosen": -294.339599609375, "logps/rejected": -267.4578857421875, "loss": 0.7226, "rewards/accuracies": 0.5, "rewards/chosen": -0.12384767830371857, "rewards/margins": 0.0343015231192112, "rewards/rejected": -0.15814919769763947, "step": 42 }, { "epoch": 0.0, "learning_rate": 9.847328244274808e-08, "logits/chosen": -3.323808431625366, "logits/rejected": -3.402543067932129, "logps/chosen": -290.0013732910156, "logps/rejected": -249.97146606445312, "loss": 0.5933, "rewards/accuracies": 0.75, "rewards/chosen": 0.0443418025970459, "rewards/margins": 0.22824504971504211, "rewards/rejected": -0.18390324711799622, "step": 43 }, { "epoch": 0.01, "learning_rate": 1.0076335877862595e-07, "logits/chosen": -3.105375289916992, "logits/rejected": -3.2476706504821777, "logps/chosen": -180.52053833007812, "logps/rejected": -168.6074676513672, "loss": 0.6973, "rewards/accuracies": 0.5, "rewards/chosen": -0.30576640367507935, "rewards/margins": 0.009885972365736961, "rewards/rejected": -0.31565237045288086, "step": 44 }, { "epoch": 0.01, "learning_rate": 1.0305343511450381e-07, "logits/chosen": -3.436368465423584, "logits/rejected": -3.6154918670654297, "logps/chosen": -190.18783569335938, "logps/rejected": -251.2783660888672, "loss": 0.6095, "rewards/accuracies": 0.625, "rewards/chosen": -0.2333325445652008, "rewards/margins": 0.27039211988449097, "rewards/rejected": -0.5037246346473694, "step": 45 }, { "epoch": 0.01, "learning_rate": 1.0534351145038167e-07, "logits/chosen": -3.218198776245117, "logits/rejected": -3.23685884475708, "logps/chosen": -222.12570190429688, "logps/rejected": -240.62774658203125, "loss": 0.7216, "rewards/accuracies": 0.5, "rewards/chosen": -0.2075226604938507, "rewards/margins": -0.04449383169412613, "rewards/rejected": -0.1630288064479828, "step": 46 }, { "epoch": 0.01, "learning_rate": 1.0763358778625953e-07, "logits/chosen": -3.4242541790008545, "logits/rejected": -3.4930057525634766, "logps/chosen": -446.5312805175781, "logps/rejected": -243.98568725585938, "loss": 0.7223, "rewards/accuracies": 0.625, "rewards/chosen": -0.1561458706855774, "rewards/margins": -0.028374865651130676, "rewards/rejected": -0.12777099013328552, "step": 47 }, { "epoch": 0.01, "learning_rate": 1.099236641221374e-07, "logits/chosen": -3.3152806758880615, "logits/rejected": -3.234919786453247, "logps/chosen": -198.54061889648438, "logps/rejected": -158.94789123535156, "loss": 0.8807, "rewards/accuracies": 0.25, "rewards/chosen": -0.4782709777355194, "rewards/margins": -0.3029758036136627, "rewards/rejected": -0.1752951741218567, "step": 48 }, { "epoch": 0.01, "learning_rate": 1.1221374045801526e-07, "logits/chosen": -3.510044574737549, "logits/rejected": -3.3472442626953125, "logps/chosen": -252.0076904296875, "logps/rejected": -188.69393920898438, "loss": 0.7912, "rewards/accuracies": 0.5, "rewards/chosen": -0.2720061242580414, "rewards/margins": -0.1420605331659317, "rewards/rejected": -0.1299455612897873, "step": 49 }, { "epoch": 0.01, "learning_rate": 1.1450381679389313e-07, "logits/chosen": -2.311176061630249, "logits/rejected": -2.34525990486145, "logps/chosen": -348.6552734375, "logps/rejected": -330.98394775390625, "loss": 0.6744, "rewards/accuracies": 0.625, "rewards/chosen": -0.20904523134231567, "rewards/margins": 0.12434081733226776, "rewards/rejected": -0.33338606357574463, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.1679389312977098e-07, "logits/chosen": -2.6295225620269775, "logits/rejected": -2.6080965995788574, "logps/chosen": -166.45623779296875, "logps/rejected": -237.57711791992188, "loss": 0.4807, "rewards/accuracies": 1.0, "rewards/chosen": -0.19380536675453186, "rewards/margins": 0.9976029396057129, "rewards/rejected": -1.1914082765579224, "step": 51 }, { "epoch": 0.01, "learning_rate": 1.1908396946564885e-07, "logits/chosen": -3.254615068435669, "logits/rejected": -3.429771900177002, "logps/chosen": -275.1093444824219, "logps/rejected": -261.70440673828125, "loss": 0.6478, "rewards/accuracies": 0.75, "rewards/chosen": -0.32171791791915894, "rewards/margins": 0.18373233079910278, "rewards/rejected": -0.5054502487182617, "step": 52 }, { "epoch": 0.01, "learning_rate": 1.2137404580152673e-07, "logits/chosen": -2.9028823375701904, "logits/rejected": -2.97601318359375, "logps/chosen": -273.58074951171875, "logps/rejected": -264.04998779296875, "loss": 0.6445, "rewards/accuracies": 0.625, "rewards/chosen": -0.10398735851049423, "rewards/margins": 0.13454608619213104, "rewards/rejected": -0.23853343725204468, "step": 53 }, { "epoch": 0.01, "learning_rate": 1.2366412213740458e-07, "logits/chosen": -3.6550729274749756, "logits/rejected": -3.7607603073120117, "logps/chosen": -283.7383117675781, "logps/rejected": -102.44178771972656, "loss": 0.847, "rewards/accuracies": 0.5, "rewards/chosen": -0.22564618289470673, "rewards/margins": -0.20799270272254944, "rewards/rejected": -0.017653487622737885, "step": 54 }, { "epoch": 0.01, "learning_rate": 1.2595419847328243e-07, "logits/chosen": -3.3850491046905518, "logits/rejected": -3.008723258972168, "logps/chosen": -225.89337158203125, "logps/rejected": -225.0657196044922, "loss": 0.9198, "rewards/accuracies": 0.25, "rewards/chosen": -0.3809661865234375, "rewards/margins": -0.34977200627326965, "rewards/rejected": -0.031194206327199936, "step": 55 }, { "epoch": 0.01, "learning_rate": 1.2824427480916029e-07, "logits/chosen": -3.1954517364501953, "logits/rejected": -3.2309398651123047, "logps/chosen": -250.4422149658203, "logps/rejected": -288.80108642578125, "loss": 0.6738, "rewards/accuracies": 0.75, "rewards/chosen": -0.0924837589263916, "rewards/margins": 0.08327709138393402, "rewards/rejected": -0.17576085031032562, "step": 56 }, { "epoch": 0.01, "learning_rate": 1.3053435114503817e-07, "logits/chosen": -3.1056759357452393, "logits/rejected": -3.3055460453033447, "logps/chosen": -302.9123840332031, "logps/rejected": -377.08990478515625, "loss": 0.6432, "rewards/accuracies": 0.625, "rewards/chosen": -0.1020759865641594, "rewards/margins": 0.17593111097812653, "rewards/rejected": -0.2780070900917053, "step": 57 }, { "epoch": 0.01, "learning_rate": 1.3282442748091602e-07, "logits/chosen": -3.499239921569824, "logits/rejected": -3.706509590148926, "logps/chosen": -210.54481506347656, "logps/rejected": -177.09408569335938, "loss": 0.7282, "rewards/accuracies": 0.5, "rewards/chosen": -0.13845963776111603, "rewards/margins": -0.05474615469574928, "rewards/rejected": -0.08371348679065704, "step": 58 }, { "epoch": 0.01, "learning_rate": 1.3511450381679387e-07, "logits/chosen": -3.2795305252075195, "logits/rejected": -3.40950345993042, "logps/chosen": -227.6250457763672, "logps/rejected": -253.87281799316406, "loss": 0.6866, "rewards/accuracies": 0.5, "rewards/chosen": -0.01637895405292511, "rewards/margins": 0.05221076309680939, "rewards/rejected": -0.0685897171497345, "step": 59 }, { "epoch": 0.01, "learning_rate": 1.3740458015267175e-07, "logits/chosen": -2.739415168762207, "logits/rejected": -2.7073135375976562, "logps/chosen": -263.2015075683594, "logps/rejected": -264.9258728027344, "loss": 0.612, "rewards/accuracies": 0.625, "rewards/chosen": -0.0466853603720665, "rewards/margins": 0.2469082474708557, "rewards/rejected": -0.2935935854911804, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.396946564885496e-07, "logits/chosen": -3.107130765914917, "logits/rejected": -3.1482627391815186, "logps/chosen": -232.05810546875, "logps/rejected": -217.86846923828125, "loss": 0.64, "rewards/accuracies": 0.625, "rewards/chosen": -0.35781604051589966, "rewards/margins": 0.1471811830997467, "rewards/rejected": -0.504997193813324, "step": 61 }, { "epoch": 0.01, "learning_rate": 1.4198473282442748e-07, "logits/chosen": -3.232652187347412, "logits/rejected": -3.2418293952941895, "logps/chosen": -293.41375732421875, "logps/rejected": -176.07763671875, "loss": 0.8971, "rewards/accuracies": 0.25, "rewards/chosen": -0.5810433626174927, "rewards/margins": -0.1955834925174713, "rewards/rejected": -0.3854598104953766, "step": 62 }, { "epoch": 0.01, "learning_rate": 1.4427480916030533e-07, "logits/chosen": -3.355353355407715, "logits/rejected": -2.914226531982422, "logps/chosen": -312.8883361816406, "logps/rejected": -245.61172485351562, "loss": 0.7297, "rewards/accuracies": 0.625, "rewards/chosen": -0.30815935134887695, "rewards/margins": 0.03318149596452713, "rewards/rejected": -0.3413408398628235, "step": 63 }, { "epoch": 0.01, "learning_rate": 1.465648854961832e-07, "logits/chosen": -3.356411933898926, "logits/rejected": -3.5593082904815674, "logps/chosen": -190.41754150390625, "logps/rejected": -216.968017578125, "loss": 0.5685, "rewards/accuracies": 0.875, "rewards/chosen": -0.08559704571962357, "rewards/margins": 0.34894007444381714, "rewards/rejected": -0.4345371127128601, "step": 64 }, { "epoch": 0.01, "learning_rate": 1.4885496183206107e-07, "logits/chosen": -3.1831443309783936, "logits/rejected": -3.4159247875213623, "logps/chosen": -246.69711303710938, "logps/rejected": -378.2087097167969, "loss": 0.6722, "rewards/accuracies": 0.625, "rewards/chosen": -0.21070018410682678, "rewards/margins": 0.09936267137527466, "rewards/rejected": -0.31006285548210144, "step": 65 }, { "epoch": 0.01, "learning_rate": 1.5114503816793892e-07, "logits/chosen": -3.176586389541626, "logits/rejected": -3.2331390380859375, "logps/chosen": -145.62741088867188, "logps/rejected": -201.29031372070312, "loss": 0.6863, "rewards/accuracies": 0.375, "rewards/chosen": -0.00719447061419487, "rewards/margins": 0.04938926920294762, "rewards/rejected": -0.05658373981714249, "step": 66 }, { "epoch": 0.01, "learning_rate": 1.5343511450381677e-07, "logits/chosen": -3.2608749866485596, "logits/rejected": -3.1006011962890625, "logps/chosen": -295.36663818359375, "logps/rejected": -251.03538513183594, "loss": 0.7539, "rewards/accuracies": 0.375, "rewards/chosen": -0.24826902151107788, "rewards/margins": -0.10409437119960785, "rewards/rejected": -0.14417466521263123, "step": 67 }, { "epoch": 0.01, "learning_rate": 1.5572519083969465e-07, "logits/chosen": -2.817625045776367, "logits/rejected": -2.641526460647583, "logps/chosen": -312.7576599121094, "logps/rejected": -238.22512817382812, "loss": 0.6838, "rewards/accuracies": 0.625, "rewards/chosen": -0.16015835106372833, "rewards/margins": 0.04708592966198921, "rewards/rejected": -0.20724429190158844, "step": 68 }, { "epoch": 0.01, "learning_rate": 1.580152671755725e-07, "logits/chosen": -2.7484593391418457, "logits/rejected": -2.7156190872192383, "logps/chosen": -355.67657470703125, "logps/rejected": -314.1708679199219, "loss": 0.6291, "rewards/accuracies": 0.75, "rewards/chosen": -0.046055540442466736, "rewards/margins": 0.16284514963626862, "rewards/rejected": -0.20890067517757416, "step": 69 }, { "epoch": 0.01, "learning_rate": 1.6030534351145038e-07, "logits/chosen": -3.5092968940734863, "logits/rejected": -3.4158108234405518, "logps/chosen": -410.8740234375, "logps/rejected": -239.82080078125, "loss": 0.8419, "rewards/accuracies": 0.375, "rewards/chosen": -0.26288700103759766, "rewards/margins": -0.22411823272705078, "rewards/rejected": -0.038768768310546875, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.6259541984732824e-07, "logits/chosen": -3.689443588256836, "logits/rejected": -3.8457741737365723, "logps/chosen": -201.53549194335938, "logps/rejected": -196.2528839111328, "loss": 0.7281, "rewards/accuracies": 0.625, "rewards/chosen": -0.5380469560623169, "rewards/margins": 0.03280482441186905, "rewards/rejected": -0.5708518028259277, "step": 71 }, { "epoch": 0.01, "learning_rate": 1.6488549618320612e-07, "logits/chosen": -3.6252548694610596, "logits/rejected": -3.703477621078491, "logps/chosen": -186.8706817626953, "logps/rejected": -206.57540893554688, "loss": 0.7224, "rewards/accuracies": 0.5, "rewards/chosen": -0.2830352783203125, "rewards/margins": 0.10716373473405838, "rewards/rejected": -0.39019903540611267, "step": 72 }, { "epoch": 0.01, "learning_rate": 1.6717557251908397e-07, "logits/chosen": -3.7435011863708496, "logits/rejected": -3.344494104385376, "logps/chosen": -408.44549560546875, "logps/rejected": -195.21826171875, "loss": 0.6231, "rewards/accuracies": 0.75, "rewards/chosen": -0.12293081730604172, "rewards/margins": 0.21756964921951294, "rewards/rejected": -0.34050044417381287, "step": 73 }, { "epoch": 0.01, "learning_rate": 1.6946564885496185e-07, "logits/chosen": -3.2026569843292236, "logits/rejected": -3.049459934234619, "logps/chosen": -168.80950927734375, "logps/rejected": -109.94509887695312, "loss": 0.8584, "rewards/accuracies": 0.375, "rewards/chosen": -0.355167031288147, "rewards/margins": -0.14064092934131622, "rewards/rejected": -0.21452608704566956, "step": 74 }, { "epoch": 0.01, "learning_rate": 1.7175572519083967e-07, "logits/chosen": -3.384188652038574, "logits/rejected": -3.38889479637146, "logps/chosen": -136.00726318359375, "logps/rejected": -167.09767150878906, "loss": 0.585, "rewards/accuracies": 0.75, "rewards/chosen": -0.009287472814321518, "rewards/margins": 0.2863180935382843, "rewards/rejected": -0.2956055700778961, "step": 75 }, { "epoch": 0.01, "learning_rate": 1.7404580152671753e-07, "logits/chosen": -3.215665340423584, "logits/rejected": -3.5976943969726562, "logps/chosen": -208.8179473876953, "logps/rejected": -237.8899688720703, "loss": 0.5989, "rewards/accuracies": 0.625, "rewards/chosen": -0.22458000481128693, "rewards/margins": 0.2855520248413086, "rewards/rejected": -0.5101320743560791, "step": 76 }, { "epoch": 0.01, "learning_rate": 1.763358778625954e-07, "logits/chosen": -3.6791999340057373, "logits/rejected": -3.5767288208007812, "logps/chosen": -192.86703491210938, "logps/rejected": -144.51394653320312, "loss": 0.6796, "rewards/accuracies": 0.5, "rewards/chosen": -0.1616715043783188, "rewards/margins": 0.09672967344522476, "rewards/rejected": -0.25840115547180176, "step": 77 }, { "epoch": 0.01, "learning_rate": 1.7862595419847326e-07, "logits/chosen": -3.0993099212646484, "logits/rejected": -3.1729578971862793, "logps/chosen": -133.24313354492188, "logps/rejected": -268.46734619140625, "loss": 0.7985, "rewards/accuracies": 0.25, "rewards/chosen": -0.3121618330478668, "rewards/margins": -0.14207322895526886, "rewards/rejected": -0.17008860409259796, "step": 78 }, { "epoch": 0.01, "learning_rate": 1.8091603053435114e-07, "logits/chosen": -2.547306537628174, "logits/rejected": -2.672288179397583, "logps/chosen": -510.82281494140625, "logps/rejected": -329.476806640625, "loss": 0.8966, "rewards/accuracies": 0.25, "rewards/chosen": -0.27125856280326843, "rewards/margins": -0.32742053270339966, "rewards/rejected": 0.05616197735071182, "step": 79 }, { "epoch": 0.01, "learning_rate": 1.8320610687022902e-07, "logits/chosen": -3.897907257080078, "logits/rejected": -3.639875888824463, "logps/chosen": -283.5416259765625, "logps/rejected": -196.81964111328125, "loss": 0.6515, "rewards/accuracies": 0.375, "rewards/chosen": -0.23461467027664185, "rewards/margins": 0.17349721491336823, "rewards/rejected": -0.4081118404865265, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.8549618320610687e-07, "logits/chosen": -2.970639228820801, "logits/rejected": -2.8881149291992188, "logps/chosen": -190.8037109375, "logps/rejected": -248.2021026611328, "loss": 1.0786, "rewards/accuracies": 0.125, "rewards/chosen": -0.45137572288513184, "rewards/margins": -0.5815427303314209, "rewards/rejected": 0.13016700744628906, "step": 81 }, { "epoch": 0.01, "learning_rate": 1.8778625954198475e-07, "logits/chosen": -2.622469425201416, "logits/rejected": -2.6842331886291504, "logps/chosen": -524.6024169921875, "logps/rejected": -383.9125671386719, "loss": 0.6993, "rewards/accuracies": 0.625, "rewards/chosen": -0.3260946273803711, "rewards/margins": 0.0016674511134624481, "rewards/rejected": -0.32776206731796265, "step": 82 }, { "epoch": 0.01, "learning_rate": 1.9007633587786258e-07, "logits/chosen": -2.657881259918213, "logits/rejected": -2.80505108833313, "logps/chosen": -194.5184326171875, "logps/rejected": -249.5345458984375, "loss": 0.5936, "rewards/accuracies": 0.75, "rewards/chosen": -0.01869310811161995, "rewards/margins": 0.2896742522716522, "rewards/rejected": -0.3083673417568207, "step": 83 }, { "epoch": 0.01, "learning_rate": 1.9236641221374043e-07, "logits/chosen": -3.2683839797973633, "logits/rejected": -3.5612690448760986, "logps/chosen": -90.61796569824219, "logps/rejected": -216.69204711914062, "loss": 0.513, "rewards/accuracies": 0.625, "rewards/chosen": 0.2816338539123535, "rewards/margins": 0.6866769194602966, "rewards/rejected": -0.4050430357456207, "step": 84 }, { "epoch": 0.01, "learning_rate": 1.946564885496183e-07, "logits/chosen": -3.343080520629883, "logits/rejected": -3.335479497909546, "logps/chosen": -188.54107666015625, "logps/rejected": -179.77496337890625, "loss": 0.669, "rewards/accuracies": 0.625, "rewards/chosen": -0.28755441308021545, "rewards/margins": 0.09143888205289841, "rewards/rejected": -0.37899330258369446, "step": 85 }, { "epoch": 0.01, "learning_rate": 1.9694656488549616e-07, "logits/chosen": -3.7428441047668457, "logits/rejected": -3.585681438446045, "logps/chosen": -316.42376708984375, "logps/rejected": -277.25347900390625, "loss": 0.8771, "rewards/accuracies": 0.5, "rewards/chosen": -0.40692636370658875, "rewards/margins": 0.04885232448577881, "rewards/rejected": -0.45577865839004517, "step": 86 }, { "epoch": 0.01, "learning_rate": 1.9923664122137404e-07, "logits/chosen": -3.7199385166168213, "logits/rejected": -3.6908345222473145, "logps/chosen": -286.1842346191406, "logps/rejected": -298.1600646972656, "loss": 0.7075, "rewards/accuracies": 0.625, "rewards/chosen": -0.3951358497142792, "rewards/margins": 0.15575389564037323, "rewards/rejected": -0.5508897304534912, "step": 87 }, { "epoch": 0.01, "learning_rate": 2.015267175572519e-07, "logits/chosen": -3.4288408756256104, "logits/rejected": -3.425314426422119, "logps/chosen": -249.10635375976562, "logps/rejected": -215.15084838867188, "loss": 0.7498, "rewards/accuracies": 0.375, "rewards/chosen": -0.3369584381580353, "rewards/margins": -0.057854074984788895, "rewards/rejected": -0.2791043817996979, "step": 88 }, { "epoch": 0.01, "learning_rate": 2.0381679389312977e-07, "logits/chosen": -3.7518229484558105, "logits/rejected": -3.4919657707214355, "logps/chosen": -363.00244140625, "logps/rejected": -267.1479187011719, "loss": 0.9747, "rewards/accuracies": 0.25, "rewards/chosen": -0.6541010141372681, "rewards/margins": -0.43367499113082886, "rewards/rejected": -0.2204260379076004, "step": 89 }, { "epoch": 0.01, "learning_rate": 2.0610687022900762e-07, "logits/chosen": -2.840808629989624, "logits/rejected": -2.660957098007202, "logps/chosen": -231.73004150390625, "logps/rejected": -251.68307495117188, "loss": 1.2391, "rewards/accuracies": 0.375, "rewards/chosen": -0.837734043598175, "rewards/margins": -0.6147798299789429, "rewards/rejected": -0.22295422852039337, "step": 90 }, { "epoch": 0.01, "learning_rate": 2.083969465648855e-07, "logits/chosen": -2.8722660541534424, "logits/rejected": -3.3833670616149902, "logps/chosen": -108.17529296875, "logps/rejected": -186.573486328125, "loss": 0.532, "rewards/accuracies": 0.625, "rewards/chosen": -0.03559593856334686, "rewards/margins": 0.44083544611930847, "rewards/rejected": -0.47643136978149414, "step": 91 }, { "epoch": 0.01, "learning_rate": 2.1068702290076333e-07, "logits/chosen": -2.808424711227417, "logits/rejected": -2.9115793704986572, "logps/chosen": -279.8440856933594, "logps/rejected": -182.3898468017578, "loss": 0.7081, "rewards/accuracies": 0.5, "rewards/chosen": -0.20777331292629242, "rewards/margins": 0.06929311901330948, "rewards/rejected": -0.2770664393901825, "step": 92 }, { "epoch": 0.01, "learning_rate": 2.129770992366412e-07, "logits/chosen": -3.868560314178467, "logits/rejected": -3.631479263305664, "logps/chosen": -395.3164367675781, "logps/rejected": -268.59808349609375, "loss": 0.6555, "rewards/accuracies": 0.625, "rewards/chosen": -0.10652284324169159, "rewards/margins": 0.22148486971855164, "rewards/rejected": -0.32800769805908203, "step": 93 }, { "epoch": 0.01, "learning_rate": 2.1526717557251906e-07, "logits/chosen": -2.4958600997924805, "logits/rejected": -2.588447332382202, "logps/chosen": -165.06683349609375, "logps/rejected": -252.86849975585938, "loss": 0.8546, "rewards/accuracies": 0.5, "rewards/chosen": -0.3297153413295746, "rewards/margins": -0.17414125800132751, "rewards/rejected": -0.15557409822940826, "step": 94 }, { "epoch": 0.01, "learning_rate": 2.1755725190839694e-07, "logits/chosen": -4.081074237823486, "logits/rejected": -3.84214448928833, "logps/chosen": -268.2448425292969, "logps/rejected": -181.27870178222656, "loss": 0.6577, "rewards/accuracies": 0.625, "rewards/chosen": -0.19969063997268677, "rewards/margins": 0.13628439605236053, "rewards/rejected": -0.3359750211238861, "step": 95 }, { "epoch": 0.01, "learning_rate": 2.198473282442748e-07, "logits/chosen": -2.5362308025360107, "logits/rejected": -2.412184715270996, "logps/chosen": -210.59732055664062, "logps/rejected": -203.92547607421875, "loss": 0.5965, "rewards/accuracies": 0.875, "rewards/chosen": -0.029003728181123734, "rewards/margins": 0.2204502522945404, "rewards/rejected": -0.24945397675037384, "step": 96 }, { "epoch": 0.01, "learning_rate": 2.2213740458015267e-07, "logits/chosen": -2.621870994567871, "logits/rejected": -2.678809642791748, "logps/chosen": -270.0916442871094, "logps/rejected": -223.74954223632812, "loss": 0.8207, "rewards/accuracies": 0.625, "rewards/chosen": -0.21331731975078583, "rewards/margins": -0.17596152424812317, "rewards/rejected": -0.037355780601501465, "step": 97 }, { "epoch": 0.01, "learning_rate": 2.2442748091603053e-07, "logits/chosen": -3.2101831436157227, "logits/rejected": -3.0311570167541504, "logps/chosen": -204.61627197265625, "logps/rejected": -240.28762817382812, "loss": 0.6831, "rewards/accuracies": 0.5, "rewards/chosen": -0.21028414368629456, "rewards/margins": 0.12435102462768555, "rewards/rejected": -0.3346351683139801, "step": 98 }, { "epoch": 0.01, "learning_rate": 2.267175572519084e-07, "logits/chosen": -2.986084461212158, "logits/rejected": -3.260254383087158, "logps/chosen": -232.95578002929688, "logps/rejected": -339.60125732421875, "loss": 0.6849, "rewards/accuracies": 0.625, "rewards/chosen": -0.29341188073158264, "rewards/margins": 0.08152984082698822, "rewards/rejected": -0.37494170665740967, "step": 99 }, { "epoch": 0.01, "learning_rate": 2.2900763358778626e-07, "logits/chosen": -3.2261757850646973, "logits/rejected": -3.233630418777466, "logps/chosen": -133.8319091796875, "logps/rejected": -266.5990295410156, "loss": 0.6538, "rewards/accuracies": 0.625, "rewards/chosen": -0.2386825531721115, "rewards/margins": 0.14204590022563934, "rewards/rejected": -0.38072848320007324, "step": 100 }, { "epoch": 0.01, "learning_rate": 2.3129770992366408e-07, "logits/chosen": -2.8640241622924805, "logits/rejected": -3.1998090744018555, "logps/chosen": -154.29061889648438, "logps/rejected": -190.56939697265625, "loss": 0.664, "rewards/accuracies": 0.75, "rewards/chosen": -0.45760613679885864, "rewards/margins": 0.39487671852111816, "rewards/rejected": -0.852482795715332, "step": 101 }, { "epoch": 0.01, "learning_rate": 2.3358778625954196e-07, "logits/chosen": -3.082688808441162, "logits/rejected": -3.0252885818481445, "logps/chosen": -237.9062042236328, "logps/rejected": -252.95895385742188, "loss": 0.6791, "rewards/accuracies": 0.5, "rewards/chosen": -0.11365075409412384, "rewards/margins": 0.0526578426361084, "rewards/rejected": -0.16630861163139343, "step": 102 }, { "epoch": 0.01, "learning_rate": 2.3587786259541982e-07, "logits/chosen": -2.893190860748291, "logits/rejected": -2.84110951423645, "logps/chosen": -242.72274780273438, "logps/rejected": -234.75003051757812, "loss": 0.4816, "rewards/accuracies": 0.75, "rewards/chosen": -0.07537122070789337, "rewards/margins": 0.6416997313499451, "rewards/rejected": -0.7170709371566772, "step": 103 }, { "epoch": 0.01, "learning_rate": 2.381679389312977e-07, "logits/chosen": -2.6073660850524902, "logits/rejected": -2.7198150157928467, "logps/chosen": -468.8730773925781, "logps/rejected": -289.12579345703125, "loss": 0.7131, "rewards/accuracies": 0.5, "rewards/chosen": -0.37968218326568604, "rewards/margins": 0.021421197801828384, "rewards/rejected": -0.4011034369468689, "step": 104 }, { "epoch": 0.01, "learning_rate": 2.4045801526717555e-07, "logits/chosen": -2.5145530700683594, "logits/rejected": -2.4582180976867676, "logps/chosen": -374.68780517578125, "logps/rejected": -169.569091796875, "loss": 0.9596, "rewards/accuracies": 0.375, "rewards/chosen": -0.36867406964302063, "rewards/margins": -0.38278090953826904, "rewards/rejected": 0.01410684734582901, "step": 105 }, { "epoch": 0.01, "learning_rate": 2.4274809160305345e-07, "logits/chosen": -2.3901045322418213, "logits/rejected": -2.6872684955596924, "logps/chosen": -507.0616455078125, "logps/rejected": -330.80035400390625, "loss": 0.7278, "rewards/accuracies": 0.375, "rewards/chosen": -0.35140591859817505, "rewards/margins": 0.08202869445085526, "rewards/rejected": -0.4334346055984497, "step": 106 }, { "epoch": 0.01, "learning_rate": 2.450381679389313e-07, "logits/chosen": -3.057403326034546, "logits/rejected": -3.0108768939971924, "logps/chosen": -148.05653381347656, "logps/rejected": -149.05780029296875, "loss": 0.6064, "rewards/accuracies": 0.75, "rewards/chosen": 0.01555030420422554, "rewards/margins": 0.20563119649887085, "rewards/rejected": -0.19008088111877441, "step": 107 }, { "epoch": 0.01, "learning_rate": 2.4732824427480916e-07, "logits/chosen": -2.9712204933166504, "logits/rejected": -3.286478042602539, "logps/chosen": -191.14633178710938, "logps/rejected": -256.52520751953125, "loss": 0.7175, "rewards/accuracies": 0.5, "rewards/chosen": -0.06530895829200745, "rewards/margins": -0.015452280640602112, "rewards/rejected": -0.04985666275024414, "step": 108 }, { "epoch": 0.01, "learning_rate": 2.49618320610687e-07, "logits/chosen": -3.298278331756592, "logits/rejected": -3.1989502906799316, "logps/chosen": -280.6126708984375, "logps/rejected": -299.59027099609375, "loss": 0.7241, "rewards/accuracies": 0.625, "rewards/chosen": -0.13395948708057404, "rewards/margins": 0.010253429412841797, "rewards/rejected": -0.14421290159225464, "step": 109 }, { "epoch": 0.01, "learning_rate": 2.5190839694656487e-07, "logits/chosen": -3.0720670223236084, "logits/rejected": -3.1476120948791504, "logps/chosen": -265.6000061035156, "logps/rejected": -212.43777465820312, "loss": 0.6345, "rewards/accuracies": 0.625, "rewards/chosen": -0.23672738671302795, "rewards/margins": 0.293150931596756, "rewards/rejected": -0.5298783183097839, "step": 110 }, { "epoch": 0.01, "learning_rate": 2.541984732824427e-07, "logits/chosen": -3.446840524673462, "logits/rejected": -3.202186346054077, "logps/chosen": -222.1020965576172, "logps/rejected": -199.55259704589844, "loss": 0.6789, "rewards/accuracies": 0.375, "rewards/chosen": -0.1506931185722351, "rewards/margins": 0.06943853199481964, "rewards/rejected": -0.22013165056705475, "step": 111 }, { "epoch": 0.01, "learning_rate": 2.5648854961832057e-07, "logits/chosen": -3.349372625350952, "logits/rejected": -3.331946849822998, "logps/chosen": -184.6533660888672, "logps/rejected": -186.05755615234375, "loss": 0.5072, "rewards/accuracies": 1.0, "rewards/chosen": -0.0090029276907444, "rewards/margins": 0.45447224378585815, "rewards/rejected": -0.46347516775131226, "step": 112 }, { "epoch": 0.01, "learning_rate": 2.587786259541985e-07, "logits/chosen": -3.6324164867401123, "logits/rejected": -3.5397114753723145, "logps/chosen": -242.62844848632812, "logps/rejected": -219.70465087890625, "loss": 0.676, "rewards/accuracies": 0.375, "rewards/chosen": -0.2702448070049286, "rewards/margins": 0.21820223331451416, "rewards/rejected": -0.48844704031944275, "step": 113 }, { "epoch": 0.01, "learning_rate": 2.6106870229007633e-07, "logits/chosen": -2.633464813232422, "logits/rejected": -2.981417655944824, "logps/chosen": -439.77032470703125, "logps/rejected": -337.91033935546875, "loss": 0.7016, "rewards/accuracies": 0.5, "rewards/chosen": -0.32548338174819946, "rewards/margins": 0.012251023203134537, "rewards/rejected": -0.3377344012260437, "step": 114 }, { "epoch": 0.01, "learning_rate": 2.633587786259542e-07, "logits/chosen": -3.2973833084106445, "logits/rejected": -3.400735378265381, "logps/chosen": -138.05523681640625, "logps/rejected": -180.0774383544922, "loss": 0.6534, "rewards/accuracies": 0.625, "rewards/chosen": -0.04633617028594017, "rewards/margins": 0.21817225217819214, "rewards/rejected": -0.26450836658477783, "step": 115 }, { "epoch": 0.01, "learning_rate": 2.6564885496183204e-07, "logits/chosen": -3.3439953327178955, "logits/rejected": -3.0374372005462646, "logps/chosen": -260.5849609375, "logps/rejected": -185.37100219726562, "loss": 0.8746, "rewards/accuracies": 0.25, "rewards/chosen": -0.5902585387229919, "rewards/margins": -0.2743756175041199, "rewards/rejected": -0.31588292121887207, "step": 116 }, { "epoch": 0.01, "learning_rate": 2.6793893129770994e-07, "logits/chosen": -3.410749912261963, "logits/rejected": -3.595094680786133, "logps/chosen": -108.80453491210938, "logps/rejected": -123.71438598632812, "loss": 0.6974, "rewards/accuracies": 0.625, "rewards/chosen": -0.03910769149661064, "rewards/margins": 0.10077085345983505, "rewards/rejected": -0.1398785263299942, "step": 117 }, { "epoch": 0.01, "learning_rate": 2.7022900763358774e-07, "logits/chosen": -3.1846394538879395, "logits/rejected": -3.3248443603515625, "logps/chosen": -129.10328674316406, "logps/rejected": -176.6233367919922, "loss": 0.6864, "rewards/accuracies": 0.5, "rewards/chosen": -0.2899637222290039, "rewards/margins": 0.16040734946727753, "rewards/rejected": -0.45037105679512024, "step": 118 }, { "epoch": 0.01, "learning_rate": 2.7251908396946565e-07, "logits/chosen": -3.4267988204956055, "logits/rejected": -3.3199591636657715, "logps/chosen": -207.3040771484375, "logps/rejected": -240.88040161132812, "loss": 0.6593, "rewards/accuracies": 0.75, "rewards/chosen": -0.06342926621437073, "rewards/margins": 0.19744788110256195, "rewards/rejected": -0.2608771324157715, "step": 119 }, { "epoch": 0.01, "learning_rate": 2.748091603053435e-07, "logits/chosen": -2.868412494659424, "logits/rejected": -2.5516269207000732, "logps/chosen": -210.47901916503906, "logps/rejected": -158.34957885742188, "loss": 0.7778, "rewards/accuracies": 0.375, "rewards/chosen": -0.354963481426239, "rewards/margins": -0.040900036692619324, "rewards/rejected": -0.31406348943710327, "step": 120 }, { "epoch": 0.01, "learning_rate": 2.7709923664122135e-07, "logits/chosen": -3.559480905532837, "logits/rejected": -3.4399425983428955, "logps/chosen": -209.5469970703125, "logps/rejected": -328.025146484375, "loss": 0.7457, "rewards/accuracies": 0.375, "rewards/chosen": 0.024084679782390594, "rewards/margins": 0.13470521569252014, "rewards/rejected": -0.11062048375606537, "step": 121 }, { "epoch": 0.01, "learning_rate": 2.793893129770992e-07, "logits/chosen": -3.115969657897949, "logits/rejected": -3.3250746726989746, "logps/chosen": -136.64122009277344, "logps/rejected": -129.7142333984375, "loss": 0.7404, "rewards/accuracies": 0.375, "rewards/chosen": -0.10612975060939789, "rewards/margins": -0.06728742271661758, "rewards/rejected": -0.03884231299161911, "step": 122 }, { "epoch": 0.01, "learning_rate": 2.816793893129771e-07, "logits/chosen": -2.689939260482788, "logits/rejected": -2.5061988830566406, "logps/chosen": -154.2698211669922, "logps/rejected": -311.65301513671875, "loss": 0.7122, "rewards/accuracies": 0.375, "rewards/chosen": -0.03599357604980469, "rewards/margins": -0.006336741149425507, "rewards/rejected": -0.029656827449798584, "step": 123 }, { "epoch": 0.01, "learning_rate": 2.8396946564885496e-07, "logits/chosen": -2.684889793395996, "logits/rejected": -2.8223021030426025, "logps/chosen": -223.34930419921875, "logps/rejected": -322.0392150878906, "loss": 0.7694, "rewards/accuracies": 0.5, "rewards/chosen": -0.25060153007507324, "rewards/margins": -0.10317506641149521, "rewards/rejected": -0.14742647111415863, "step": 124 }, { "epoch": 0.01, "learning_rate": 2.862595419847328e-07, "logits/chosen": -2.6810455322265625, "logits/rejected": -2.955416202545166, "logps/chosen": -180.22251892089844, "logps/rejected": -207.12493896484375, "loss": 0.8187, "rewards/accuracies": 0.25, "rewards/chosen": -0.3565467596054077, "rewards/margins": -0.14117440581321716, "rewards/rejected": -0.21537233889102936, "step": 125 }, { "epoch": 0.01, "learning_rate": 2.8854961832061067e-07, "logits/chosen": -2.4012675285339355, "logits/rejected": -2.5587844848632812, "logps/chosen": -276.8446960449219, "logps/rejected": -217.76889038085938, "loss": 0.7352, "rewards/accuracies": 0.25, "rewards/chosen": -0.19575592875480652, "rewards/margins": -0.04734290391206741, "rewards/rejected": -0.1484130471944809, "step": 126 }, { "epoch": 0.01, "learning_rate": 2.908396946564885e-07, "logits/chosen": -3.5057852268218994, "logits/rejected": -3.3522205352783203, "logps/chosen": -171.24147033691406, "logps/rejected": -155.68870544433594, "loss": 0.746, "rewards/accuracies": 0.375, "rewards/chosen": -0.1005580872297287, "rewards/margins": 0.0946294516324997, "rewards/rejected": -0.1951875239610672, "step": 127 }, { "epoch": 0.01, "learning_rate": 2.931297709923664e-07, "logits/chosen": -3.698072671890259, "logits/rejected": -3.657116651535034, "logps/chosen": -301.0848388671875, "logps/rejected": -367.31622314453125, "loss": 0.4862, "rewards/accuracies": 0.75, "rewards/chosen": 0.06704981625080109, "rewards/margins": 0.8873828649520874, "rewards/rejected": -0.8203331232070923, "step": 128 }, { "epoch": 0.01, "learning_rate": 2.9541984732824423e-07, "logits/chosen": -2.964540481567383, "logits/rejected": -2.9919073581695557, "logps/chosen": -231.81051635742188, "logps/rejected": -252.814453125, "loss": 0.7622, "rewards/accuracies": 0.5, "rewards/chosen": -0.2602808475494385, "rewards/margins": 0.28699302673339844, "rewards/rejected": -0.5472738146781921, "step": 129 }, { "epoch": 0.01, "learning_rate": 2.9770992366412213e-07, "logits/chosen": -3.481682538986206, "logits/rejected": -3.518353223800659, "logps/chosen": -190.08633422851562, "logps/rejected": -166.47813415527344, "loss": 0.5496, "rewards/accuracies": 0.875, "rewards/chosen": -0.13021288812160492, "rewards/margins": 0.5465818047523499, "rewards/rejected": -0.6767946481704712, "step": 130 }, { "epoch": 0.02, "learning_rate": 3e-07, "logits/chosen": -2.5496842861175537, "logits/rejected": -2.475698709487915, "logps/chosen": -268.1327209472656, "logps/rejected": -337.38543701171875, "loss": 0.6545, "rewards/accuracies": 0.625, "rewards/chosen": -0.058471158146858215, "rewards/margins": 0.12006109207868576, "rewards/rejected": -0.17853224277496338, "step": 131 }, { "epoch": 0.02, "learning_rate": 2.9996488353037574e-07, "logits/chosen": -3.3209218978881836, "logits/rejected": -3.0210037231445312, "logps/chosen": -268.4331359863281, "logps/rejected": -175.0969696044922, "loss": 0.822, "rewards/accuracies": 0.625, "rewards/chosen": -0.38932472467422485, "rewards/margins": -0.17867454886436462, "rewards/rejected": -0.21065017580986023, "step": 132 }, { "epoch": 0.02, "learning_rate": 2.999297670607515e-07, "logits/chosen": -2.925034999847412, "logits/rejected": -3.2774596214294434, "logps/chosen": -110.07563781738281, "logps/rejected": -284.17333984375, "loss": 0.4082, "rewards/accuracies": 0.875, "rewards/chosen": 0.1488136649131775, "rewards/margins": 0.8118383884429932, "rewards/rejected": -0.6630247235298157, "step": 133 }, { "epoch": 0.02, "learning_rate": 2.9989465059112725e-07, "logits/chosen": -3.530514717102051, "logits/rejected": -3.268393039703369, "logps/chosen": -346.12030029296875, "logps/rejected": -265.051025390625, "loss": 0.5096, "rewards/accuracies": 0.875, "rewards/chosen": -0.016817137598991394, "rewards/margins": 0.44441425800323486, "rewards/rejected": -0.46123138070106506, "step": 134 }, { "epoch": 0.02, "learning_rate": 2.9985953412150295e-07, "logits/chosen": -3.0413150787353516, "logits/rejected": -3.0458922386169434, "logps/chosen": -438.7294921875, "logps/rejected": -348.6278076171875, "loss": 0.7099, "rewards/accuracies": 0.5, "rewards/chosen": -0.14035530388355255, "rewards/margins": 0.023443520069122314, "rewards/rejected": -0.16379882395267487, "step": 135 }, { "epoch": 0.02, "learning_rate": 2.998244176518787e-07, "logits/chosen": -2.143547534942627, "logits/rejected": -2.2817349433898926, "logps/chosen": -479.7779541015625, "logps/rejected": -485.11834716796875, "loss": 0.7278, "rewards/accuracies": 0.375, "rewards/chosen": -0.17917653918266296, "rewards/margins": 0.040828123688697815, "rewards/rejected": -0.22000466287136078, "step": 136 }, { "epoch": 0.02, "learning_rate": 2.9978930118225446e-07, "logits/chosen": -3.4842469692230225, "logits/rejected": -3.4347009658813477, "logps/chosen": -408.99639892578125, "logps/rejected": -387.94085693359375, "loss": 0.7987, "rewards/accuracies": 0.5, "rewards/chosen": -0.37270495295524597, "rewards/margins": 0.004420414566993713, "rewards/rejected": -0.3771253526210785, "step": 137 }, { "epoch": 0.02, "learning_rate": 2.997541847126302e-07, "logits/chosen": -2.9967212677001953, "logits/rejected": -2.8189125061035156, "logps/chosen": -138.25588989257812, "logps/rejected": -242.4408721923828, "loss": 0.6451, "rewards/accuracies": 0.625, "rewards/chosen": -0.2035536766052246, "rewards/margins": 0.24642862379550934, "rewards/rejected": -0.44998228549957275, "step": 138 }, { "epoch": 0.02, "learning_rate": 2.9971906824300596e-07, "logits/chosen": -3.15767765045166, "logits/rejected": -2.7366042137145996, "logps/chosen": -343.4460754394531, "logps/rejected": -171.1099853515625, "loss": 0.6915, "rewards/accuracies": 0.5, "rewards/chosen": -0.23290085792541504, "rewards/margins": 0.053667664527893066, "rewards/rejected": -0.2865685224533081, "step": 139 }, { "epoch": 0.02, "learning_rate": 2.996839517733817e-07, "logits/chosen": -2.623671054840088, "logits/rejected": -2.845733165740967, "logps/chosen": -302.492919921875, "logps/rejected": -320.3753662109375, "loss": 0.5069, "rewards/accuracies": 0.75, "rewards/chosen": -0.12305383384227753, "rewards/margins": 0.5965790152549744, "rewards/rejected": -0.7196328043937683, "step": 140 }, { "epoch": 0.02, "learning_rate": 2.996488353037574e-07, "logits/chosen": -3.285778284072876, "logits/rejected": -2.98270583152771, "logps/chosen": -287.9363708496094, "logps/rejected": -228.86444091796875, "loss": 0.7223, "rewards/accuracies": 0.5, "rewards/chosen": -0.22906330227851868, "rewards/margins": -0.010491617023944855, "rewards/rejected": -0.2185717076063156, "step": 141 }, { "epoch": 0.02, "learning_rate": 2.996137188341332e-07, "logits/chosen": -3.3022141456604004, "logits/rejected": -3.2671098709106445, "logps/chosen": -481.6185302734375, "logps/rejected": -277.2169189453125, "loss": 0.6677, "rewards/accuracies": 0.75, "rewards/chosen": -0.21497273445129395, "rewards/margins": 0.12409339845180511, "rewards/rejected": -0.33906614780426025, "step": 142 }, { "epoch": 0.02, "learning_rate": 2.9957860236450893e-07, "logits/chosen": -3.365952491760254, "logits/rejected": -3.150855779647827, "logps/chosen": -415.9041748046875, "logps/rejected": -344.7142333984375, "loss": 0.5989, "rewards/accuracies": 0.875, "rewards/chosen": -0.24185144901275635, "rewards/margins": 0.3635188639163971, "rewards/rejected": -0.6053703427314758, "step": 143 }, { "epoch": 0.02, "learning_rate": 2.995434858948847e-07, "logits/chosen": -3.2255189418792725, "logits/rejected": -3.396691083908081, "logps/chosen": -189.93124389648438, "logps/rejected": -199.94618225097656, "loss": 0.5606, "rewards/accuracies": 0.75, "rewards/chosen": 0.045084331184625626, "rewards/margins": 0.6578375101089478, "rewards/rejected": -0.61275315284729, "step": 144 }, { "epoch": 0.02, "learning_rate": 2.9950836942526043e-07, "logits/chosen": -2.579957962036133, "logits/rejected": -2.634092330932617, "logps/chosen": -385.6375427246094, "logps/rejected": -341.6743469238281, "loss": 0.5896, "rewards/accuracies": 0.875, "rewards/chosen": 0.04191902279853821, "rewards/margins": 0.2284519076347351, "rewards/rejected": -0.1865328848361969, "step": 145 }, { "epoch": 0.02, "learning_rate": 2.994732529556362e-07, "logits/chosen": -3.058870553970337, "logits/rejected": -3.120302438735962, "logps/chosen": -279.54766845703125, "logps/rejected": -271.9564514160156, "loss": 0.6933, "rewards/accuracies": 0.5, "rewards/chosen": -0.28896427154541016, "rewards/margins": 0.07057391107082367, "rewards/rejected": -0.359538197517395, "step": 146 }, { "epoch": 0.02, "learning_rate": 2.9943813648601194e-07, "logits/chosen": -2.701864719390869, "logits/rejected": -2.6773757934570312, "logps/chosen": -287.5202941894531, "logps/rejected": -180.57945251464844, "loss": 0.756, "rewards/accuracies": 0.625, "rewards/chosen": -0.4529024064540863, "rewards/margins": 0.08259911835193634, "rewards/rejected": -0.5355014801025391, "step": 147 }, { "epoch": 0.02, "learning_rate": 2.994030200163877e-07, "logits/chosen": -3.3984477519989014, "logits/rejected": -3.2735342979431152, "logps/chosen": -209.30294799804688, "logps/rejected": -250.28787231445312, "loss": 0.7034, "rewards/accuracies": 0.5, "rewards/chosen": -0.2591944932937622, "rewards/margins": 0.18426847457885742, "rewards/rejected": -0.44346296787261963, "step": 148 }, { "epoch": 0.02, "learning_rate": 2.993679035467634e-07, "logits/chosen": -3.5456769466400146, "logits/rejected": -3.5502374172210693, "logps/chosen": -228.46807861328125, "logps/rejected": -195.2511749267578, "loss": 0.6286, "rewards/accuracies": 0.625, "rewards/chosen": -0.33787602186203003, "rewards/margins": 0.42107442021369934, "rewards/rejected": -0.7589504718780518, "step": 149 }, { "epoch": 0.02, "learning_rate": 2.9933278707713915e-07, "logits/chosen": -4.046043872833252, "logits/rejected": -3.7114052772521973, "logps/chosen": -249.51197814941406, "logps/rejected": -138.1561279296875, "loss": 0.5391, "rewards/accuracies": 0.75, "rewards/chosen": -0.08359494060277939, "rewards/margins": 0.51688551902771, "rewards/rejected": -0.6004804372787476, "step": 150 }, { "epoch": 0.02, "learning_rate": 2.992976706075149e-07, "logits/chosen": -3.3404526710510254, "logits/rejected": -3.208498001098633, "logps/chosen": -243.32574462890625, "logps/rejected": -231.23641967773438, "loss": 0.4954, "rewards/accuracies": 0.875, "rewards/chosen": -0.10667084157466888, "rewards/margins": 0.6393780708312988, "rewards/rejected": -0.7460489273071289, "step": 151 }, { "epoch": 0.02, "learning_rate": 2.9926255413789066e-07, "logits/chosen": -2.9796719551086426, "logits/rejected": -2.851384401321411, "logps/chosen": -198.4134521484375, "logps/rejected": -189.6981201171875, "loss": 0.802, "rewards/accuracies": 0.375, "rewards/chosen": -0.24949032068252563, "rewards/margins": -0.08734472095966339, "rewards/rejected": -0.16214559972286224, "step": 152 }, { "epoch": 0.02, "learning_rate": 2.992274376682664e-07, "logits/chosen": -3.3185057640075684, "logits/rejected": -3.5422091484069824, "logps/chosen": -340.01898193359375, "logps/rejected": -222.56600952148438, "loss": 0.6495, "rewards/accuracies": 0.5, "rewards/chosen": -0.2793940603733063, "rewards/margins": 0.3967933654785156, "rewards/rejected": -0.6761875152587891, "step": 153 }, { "epoch": 0.02, "learning_rate": 2.991923211986421e-07, "logits/chosen": -2.9152514934539795, "logits/rejected": -2.8761844635009766, "logps/chosen": -149.1962432861328, "logps/rejected": -339.2282409667969, "loss": 0.7615, "rewards/accuracies": 0.75, "rewards/chosen": -0.28893014788627625, "rewards/margins": 0.13765370845794678, "rewards/rejected": -0.426583856344223, "step": 154 }, { "epoch": 0.02, "learning_rate": 2.991572047290179e-07, "logits/chosen": -2.5073742866516113, "logits/rejected": -2.8334412574768066, "logps/chosen": -396.4125061035156, "logps/rejected": -323.62701416015625, "loss": 0.4225, "rewards/accuracies": 0.75, "rewards/chosen": 0.34479600191116333, "rewards/margins": 0.8884700536727905, "rewards/rejected": -0.543674111366272, "step": 155 }, { "epoch": 0.02, "learning_rate": 2.991220882593937e-07, "logits/chosen": -3.331512451171875, "logits/rejected": -3.384878635406494, "logps/chosen": -124.75603485107422, "logps/rejected": -154.62310791015625, "loss": 0.594, "rewards/accuracies": 0.75, "rewards/chosen": -0.10098839551210403, "rewards/margins": 0.2894188463687897, "rewards/rejected": -0.3904072642326355, "step": 156 }, { "epoch": 0.02, "learning_rate": 2.990869717897694e-07, "logits/chosen": -3.347975730895996, "logits/rejected": -3.1921215057373047, "logps/chosen": -257.6455993652344, "logps/rejected": -203.8451690673828, "loss": 0.5977, "rewards/accuracies": 0.5, "rewards/chosen": -0.07259725034236908, "rewards/margins": 0.32492148876190186, "rewards/rejected": -0.39751872420310974, "step": 157 }, { "epoch": 0.02, "learning_rate": 2.9905185532014513e-07, "logits/chosen": -3.3827998638153076, "logits/rejected": -3.5492677688598633, "logps/chosen": -328.07012939453125, "logps/rejected": -292.88995361328125, "loss": 0.5515, "rewards/accuracies": 0.75, "rewards/chosen": -0.28669944405555725, "rewards/margins": 0.6278131008148193, "rewards/rejected": -0.9145126342773438, "step": 158 }, { "epoch": 0.02, "learning_rate": 2.990167388505209e-07, "logits/chosen": -3.4158194065093994, "logits/rejected": -3.3485889434814453, "logps/chosen": -183.23655700683594, "logps/rejected": -205.1849822998047, "loss": 0.579, "rewards/accuracies": 0.625, "rewards/chosen": -0.4549804925918579, "rewards/margins": 0.5344358682632446, "rewards/rejected": -0.9894163608551025, "step": 159 }, { "epoch": 0.02, "learning_rate": 2.9898162238089664e-07, "logits/chosen": -2.8854222297668457, "logits/rejected": -2.7342538833618164, "logps/chosen": -434.2591552734375, "logps/rejected": -436.9725341796875, "loss": 0.5369, "rewards/accuracies": 0.625, "rewards/chosen": -0.12497800588607788, "rewards/margins": 0.5749577283859253, "rewards/rejected": -0.6999356746673584, "step": 160 }, { "epoch": 0.02, "learning_rate": 2.989465059112724e-07, "logits/chosen": -3.999845504760742, "logits/rejected": -3.616804838180542, "logps/chosen": -329.3415222167969, "logps/rejected": -229.0399932861328, "loss": 0.5533, "rewards/accuracies": 0.75, "rewards/chosen": -0.10923422873020172, "rewards/margins": 0.5819433927536011, "rewards/rejected": -0.6911776661872864, "step": 161 }, { "epoch": 0.02, "learning_rate": 2.989113894416481e-07, "logits/chosen": -3.4872474670410156, "logits/rejected": -3.2849693298339844, "logps/chosen": -377.61309814453125, "logps/rejected": -399.02423095703125, "loss": 0.7418, "rewards/accuracies": 0.25, "rewards/chosen": -0.36903852224349976, "rewards/margins": 0.02642093598842621, "rewards/rejected": -0.3954594135284424, "step": 162 }, { "epoch": 0.02, "learning_rate": 2.9887627297202385e-07, "logits/chosen": -3.478287696838379, "logits/rejected": -3.205300807952881, "logps/chosen": -224.24118041992188, "logps/rejected": -94.39404296875, "loss": 0.6099, "rewards/accuracies": 0.625, "rewards/chosen": 0.0231582410633564, "rewards/margins": 0.26386886835098267, "rewards/rejected": -0.24071064591407776, "step": 163 }, { "epoch": 0.02, "learning_rate": 2.9884115650239965e-07, "logits/chosen": -2.5553925037384033, "logits/rejected": -2.506513833999634, "logps/chosen": -499.73187255859375, "logps/rejected": -322.5975036621094, "loss": 0.954, "rewards/accuracies": 0.625, "rewards/chosen": -0.7257604598999023, "rewards/margins": -0.0986221432685852, "rewards/rejected": -0.6271383166313171, "step": 164 }, { "epoch": 0.02, "learning_rate": 2.9880604003277535e-07, "logits/chosen": -2.6206936836242676, "logits/rejected": -2.769092559814453, "logps/chosen": -230.21005249023438, "logps/rejected": -201.02163696289062, "loss": 0.7004, "rewards/accuracies": 0.5, "rewards/chosen": -0.2516683340072632, "rewards/margins": 0.08148515224456787, "rewards/rejected": -0.33315351605415344, "step": 165 }, { "epoch": 0.02, "learning_rate": 2.987709235631511e-07, "logits/chosen": -3.632220506668091, "logits/rejected": -3.4407827854156494, "logps/chosen": -433.216064453125, "logps/rejected": -258.58099365234375, "loss": 0.5777, "rewards/accuracies": 0.75, "rewards/chosen": -0.026065528392791748, "rewards/margins": 0.3912695646286011, "rewards/rejected": -0.4173351526260376, "step": 166 }, { "epoch": 0.02, "learning_rate": 2.9873580709352686e-07, "logits/chosen": -2.7458789348602295, "logits/rejected": -2.957353115081787, "logps/chosen": -162.34446716308594, "logps/rejected": -347.2967224121094, "loss": 0.4975, "rewards/accuracies": 0.75, "rewards/chosen": -0.05099650099873543, "rewards/margins": 0.6179355382919312, "rewards/rejected": -0.6689320206642151, "step": 167 }, { "epoch": 0.02, "learning_rate": 2.987006906239026e-07, "logits/chosen": -2.4787118434906006, "logits/rejected": -2.534574508666992, "logps/chosen": -278.05120849609375, "logps/rejected": -202.2365264892578, "loss": 0.7327, "rewards/accuracies": 0.625, "rewards/chosen": -0.29210782051086426, "rewards/margins": -0.017836904153227806, "rewards/rejected": -0.2742709219455719, "step": 168 }, { "epoch": 0.02, "learning_rate": 2.9866557415427837e-07, "logits/chosen": -2.821833610534668, "logits/rejected": -3.0649807453155518, "logps/chosen": -202.0626220703125, "logps/rejected": -204.60374450683594, "loss": 0.7851, "rewards/accuracies": 0.625, "rewards/chosen": -0.2640414834022522, "rewards/margins": -0.12227870523929596, "rewards/rejected": -0.14176276326179504, "step": 169 }, { "epoch": 0.02, "learning_rate": 2.9863045768465407e-07, "logits/chosen": -3.2463502883911133, "logits/rejected": -3.2087459564208984, "logps/chosen": -230.4561767578125, "logps/rejected": -272.46490478515625, "loss": 0.7687, "rewards/accuracies": 0.5, "rewards/chosen": -0.20005351305007935, "rewards/margins": 0.015266001224517822, "rewards/rejected": -0.21531949937343597, "step": 170 }, { "epoch": 0.02, "learning_rate": 2.985953412150298e-07, "logits/chosen": -3.313397169113159, "logits/rejected": -3.26961088180542, "logps/chosen": -233.5687713623047, "logps/rejected": -195.5608673095703, "loss": 0.742, "rewards/accuracies": 0.5, "rewards/chosen": -0.1908952295780182, "rewards/margins": -0.012923330068588257, "rewards/rejected": -0.17797189950942993, "step": 171 }, { "epoch": 0.02, "learning_rate": 2.985602247454056e-07, "logits/chosen": -3.9305286407470703, "logits/rejected": -3.6726322174072266, "logps/chosen": -334.8080749511719, "logps/rejected": -171.98944091796875, "loss": 0.6186, "rewards/accuracies": 0.625, "rewards/chosen": 0.0617518424987793, "rewards/margins": 0.2917241156101227, "rewards/rejected": -0.22997227311134338, "step": 172 }, { "epoch": 0.02, "learning_rate": 2.9852510827578133e-07, "logits/chosen": -2.7501490116119385, "logits/rejected": -3.198286771774292, "logps/chosen": -223.817138671875, "logps/rejected": -332.61468505859375, "loss": 0.5727, "rewards/accuracies": 0.75, "rewards/chosen": -0.42387324571609497, "rewards/margins": 0.6796536445617676, "rewards/rejected": -1.1035268306732178, "step": 173 }, { "epoch": 0.02, "learning_rate": 2.984899918061571e-07, "logits/chosen": -3.8350906372070312, "logits/rejected": -3.5324692726135254, "logps/chosen": -193.56277465820312, "logps/rejected": -148.30596923828125, "loss": 0.4862, "rewards/accuracies": 0.875, "rewards/chosen": 0.2093673050403595, "rewards/margins": 0.7078065872192383, "rewards/rejected": -0.498439222574234, "step": 174 }, { "epoch": 0.02, "learning_rate": 2.984548753365328e-07, "logits/chosen": -2.9773130416870117, "logits/rejected": -3.1218841075897217, "logps/chosen": -195.6688690185547, "logps/rejected": -195.92657470703125, "loss": 0.7823, "rewards/accuracies": 0.5, "rewards/chosen": -0.5283998847007751, "rewards/margins": -0.10887815058231354, "rewards/rejected": -0.4195217490196228, "step": 175 }, { "epoch": 0.02, "learning_rate": 2.984197588669086e-07, "logits/chosen": -2.9050984382629395, "logits/rejected": -3.1372880935668945, "logps/chosen": -170.55767822265625, "logps/rejected": -185.3782958984375, "loss": 0.5555, "rewards/accuracies": 0.75, "rewards/chosen": 0.06616852432489395, "rewards/margins": 0.45835667848587036, "rewards/rejected": -0.3921881914138794, "step": 176 }, { "epoch": 0.02, "learning_rate": 2.9838464239728435e-07, "logits/chosen": -3.0023117065429688, "logits/rejected": -3.1521849632263184, "logps/chosen": -385.71356201171875, "logps/rejected": -226.0330352783203, "loss": 0.5664, "rewards/accuracies": 0.75, "rewards/chosen": 0.3155870735645294, "rewards/margins": 0.32975566387176514, "rewards/rejected": -0.014168601483106613, "step": 177 }, { "epoch": 0.02, "learning_rate": 2.9834952592766005e-07, "logits/chosen": -2.600144863128662, "logits/rejected": -2.405885934829712, "logps/chosen": -137.55184936523438, "logps/rejected": -156.10321044921875, "loss": 0.7071, "rewards/accuracies": 0.625, "rewards/chosen": -0.19570621848106384, "rewards/margins": 0.07889263331890106, "rewards/rejected": -0.2745988667011261, "step": 178 }, { "epoch": 0.02, "learning_rate": 2.983144094580358e-07, "logits/chosen": -3.4724783897399902, "logits/rejected": -3.27103853225708, "logps/chosen": -340.7546081542969, "logps/rejected": -267.78375244140625, "loss": 0.3968, "rewards/accuracies": 0.875, "rewards/chosen": -0.017360538244247437, "rewards/margins": 0.9971700310707092, "rewards/rejected": -1.0145305395126343, "step": 179 }, { "epoch": 0.02, "learning_rate": 2.9827929298841155e-07, "logits/chosen": -2.8327319622039795, "logits/rejected": -2.883878707885742, "logps/chosen": -225.36587524414062, "logps/rejected": -267.1601257324219, "loss": 0.5921, "rewards/accuracies": 0.75, "rewards/chosen": -0.1742541491985321, "rewards/margins": 0.49065008759498596, "rewards/rejected": -0.6649041771888733, "step": 180 }, { "epoch": 0.02, "learning_rate": 2.982441765187873e-07, "logits/chosen": -2.311443567276001, "logits/rejected": -2.3403432369232178, "logps/chosen": -291.8216247558594, "logps/rejected": -233.33709716796875, "loss": 0.6999, "rewards/accuracies": 0.625, "rewards/chosen": -0.2790372371673584, "rewards/margins": 0.11873116344213486, "rewards/rejected": -0.39776840806007385, "step": 181 }, { "epoch": 0.02, "learning_rate": 2.9820906004916306e-07, "logits/chosen": -2.5439298152923584, "logits/rejected": -2.7971789836883545, "logps/chosen": -379.8512878417969, "logps/rejected": -301.1816711425781, "loss": 0.4132, "rewards/accuracies": 0.875, "rewards/chosen": 0.0018820781260728836, "rewards/margins": 0.8097530603408813, "rewards/rejected": -0.8078709840774536, "step": 182 }, { "epoch": 0.02, "learning_rate": 2.9817394357953876e-07, "logits/chosen": -2.828860282897949, "logits/rejected": -2.9380154609680176, "logps/chosen": -253.701416015625, "logps/rejected": -313.2247314453125, "loss": 0.6251, "rewards/accuracies": 0.75, "rewards/chosen": -0.17963212728500366, "rewards/margins": 0.28871774673461914, "rewards/rejected": -0.4683498740196228, "step": 183 }, { "epoch": 0.02, "learning_rate": 2.981388271099145e-07, "logits/chosen": -2.8582992553710938, "logits/rejected": -3.032472610473633, "logps/chosen": -471.1768493652344, "logps/rejected": -300.2481994628906, "loss": 0.3948, "rewards/accuracies": 0.875, "rewards/chosen": 0.03532904386520386, "rewards/margins": 0.9726289510726929, "rewards/rejected": -0.937299907207489, "step": 184 }, { "epoch": 0.02, "learning_rate": 2.9810371064029027e-07, "logits/chosen": -3.062253952026367, "logits/rejected": -2.9814083576202393, "logps/chosen": -287.1173095703125, "logps/rejected": -239.99176025390625, "loss": 0.5691, "rewards/accuracies": 0.625, "rewards/chosen": -0.12003950774669647, "rewards/margins": 0.42535489797592163, "rewards/rejected": -0.5453944206237793, "step": 185 }, { "epoch": 0.02, "learning_rate": 2.98068594170666e-07, "logits/chosen": -3.4551150798797607, "logits/rejected": -3.4497079849243164, "logps/chosen": -192.80047607421875, "logps/rejected": -197.1517333984375, "loss": 0.4895, "rewards/accuracies": 0.75, "rewards/chosen": -0.10274100303649902, "rewards/margins": 0.589533805847168, "rewards/rejected": -0.692274808883667, "step": 186 }, { "epoch": 0.02, "learning_rate": 2.980334777010418e-07, "logits/chosen": -2.9222331047058105, "logits/rejected": -3.0306241512298584, "logps/chosen": -264.43621826171875, "logps/rejected": -289.7286071777344, "loss": 0.5074, "rewards/accuracies": 0.75, "rewards/chosen": -0.022851087152957916, "rewards/margins": 0.7150350213050842, "rewards/rejected": -0.7378861308097839, "step": 187 }, { "epoch": 0.02, "learning_rate": 2.979983612314175e-07, "logits/chosen": -3.7061309814453125, "logits/rejected": -3.685464382171631, "logps/chosen": -194.2506866455078, "logps/rejected": -299.09600830078125, "loss": 0.5612, "rewards/accuracies": 0.625, "rewards/chosen": -0.08315838873386383, "rewards/margins": 0.5760486125946045, "rewards/rejected": -0.6592070460319519, "step": 188 }, { "epoch": 0.02, "learning_rate": 2.979632447617933e-07, "logits/chosen": -3.250340700149536, "logits/rejected": -3.4634101390838623, "logps/chosen": -240.18511962890625, "logps/rejected": -204.5301971435547, "loss": 0.5157, "rewards/accuracies": 0.75, "rewards/chosen": 0.2084040641784668, "rewards/margins": 0.5127589106559753, "rewards/rejected": -0.30435487627983093, "step": 189 }, { "epoch": 0.02, "learning_rate": 2.9792812829216904e-07, "logits/chosen": -2.8980226516723633, "logits/rejected": -2.909475803375244, "logps/chosen": -265.8323669433594, "logps/rejected": -297.021728515625, "loss": 0.5685, "rewards/accuracies": 0.75, "rewards/chosen": -0.22862929105758667, "rewards/margins": 0.45408862829208374, "rewards/rejected": -0.6827179193496704, "step": 190 }, { "epoch": 0.02, "learning_rate": 2.9789301182254474e-07, "logits/chosen": -3.1988024711608887, "logits/rejected": -3.325883626937866, "logps/chosen": -218.533203125, "logps/rejected": -245.3336944580078, "loss": 0.6795, "rewards/accuracies": 0.5, "rewards/chosen": -0.5096397995948792, "rewards/margins": 0.07023922353982925, "rewards/rejected": -0.5798790454864502, "step": 191 }, { "epoch": 0.02, "learning_rate": 2.978578953529205e-07, "logits/chosen": -3.0551133155822754, "logits/rejected": -3.1477432250976562, "logps/chosen": -233.41189575195312, "logps/rejected": -214.26553344726562, "loss": 0.5762, "rewards/accuracies": 0.75, "rewards/chosen": -0.10441388189792633, "rewards/margins": 0.514127790927887, "rewards/rejected": -0.6185417175292969, "step": 192 }, { "epoch": 0.02, "learning_rate": 2.9782277888329625e-07, "logits/chosen": -3.3801629543304443, "logits/rejected": -3.3786168098449707, "logps/chosen": -371.3843688964844, "logps/rejected": -311.36669921875, "loss": 0.5325, "rewards/accuracies": 0.625, "rewards/chosen": 0.10817471146583557, "rewards/margins": 0.5224473476409912, "rewards/rejected": -0.414272665977478, "step": 193 }, { "epoch": 0.02, "learning_rate": 2.97787662413672e-07, "logits/chosen": -2.8085408210754395, "logits/rejected": -2.8878421783447266, "logps/chosen": -248.19271850585938, "logps/rejected": -379.5545959472656, "loss": 0.4467, "rewards/accuracies": 0.75, "rewards/chosen": 0.06882905215024948, "rewards/margins": 0.9077163934707642, "rewards/rejected": -0.8388873338699341, "step": 194 }, { "epoch": 0.02, "learning_rate": 2.9775254594404776e-07, "logits/chosen": -3.327256679534912, "logits/rejected": -3.314408779144287, "logps/chosen": -310.48468017578125, "logps/rejected": -299.07159423828125, "loss": 0.4931, "rewards/accuracies": 0.75, "rewards/chosen": -0.35162973403930664, "rewards/margins": 0.9348543882369995, "rewards/rejected": -1.2864842414855957, "step": 195 }, { "epoch": 0.02, "learning_rate": 2.9771742947442346e-07, "logits/chosen": -3.902977466583252, "logits/rejected": -3.9194223880767822, "logps/chosen": -159.94650268554688, "logps/rejected": -148.90464782714844, "loss": 0.5699, "rewards/accuracies": 0.625, "rewards/chosen": -0.20235267281532288, "rewards/margins": 0.5211620330810547, "rewards/rejected": -0.7235147356987, "step": 196 }, { "epoch": 0.02, "learning_rate": 2.976823130047992e-07, "logits/chosen": -2.1873645782470703, "logits/rejected": -2.54728102684021, "logps/chosen": -439.40771484375, "logps/rejected": -402.89215087890625, "loss": 0.8621, "rewards/accuracies": 0.5, "rewards/chosen": -0.06044436991214752, "rewards/margins": -0.16255342960357666, "rewards/rejected": 0.10210905969142914, "step": 197 }, { "epoch": 0.02, "learning_rate": 2.97647196535175e-07, "logits/chosen": -2.756392478942871, "logits/rejected": -2.571483612060547, "logps/chosen": -199.18685913085938, "logps/rejected": -262.017578125, "loss": 0.8194, "rewards/accuracies": 0.625, "rewards/chosen": -0.36614125967025757, "rewards/margins": -0.0005308240652084351, "rewards/rejected": -0.36561042070388794, "step": 198 }, { "epoch": 0.02, "learning_rate": 2.976120800655507e-07, "logits/chosen": -2.8051319122314453, "logits/rejected": -3.2325615882873535, "logps/chosen": -197.07574462890625, "logps/rejected": -246.64891052246094, "loss": 0.3894, "rewards/accuracies": 1.0, "rewards/chosen": 0.3187968134880066, "rewards/margins": 1.0560178756713867, "rewards/rejected": -0.7372210025787354, "step": 199 }, { "epoch": 0.02, "learning_rate": 2.9757696359592647e-07, "logits/chosen": -3.391042470932007, "logits/rejected": -3.23837947845459, "logps/chosen": -282.110595703125, "logps/rejected": -233.5883331298828, "loss": 0.6438, "rewards/accuracies": 0.75, "rewards/chosen": -0.47287696599960327, "rewards/margins": 0.2671605050563812, "rewards/rejected": -0.7400375008583069, "step": 200 }, { "epoch": 0.02, "learning_rate": 2.9754184712630223e-07, "logits/chosen": -3.0555269718170166, "logits/rejected": -2.8955140113830566, "logps/chosen": -215.63636779785156, "logps/rejected": -219.0470428466797, "loss": 0.3911, "rewards/accuracies": 1.0, "rewards/chosen": -0.08939912170171738, "rewards/margins": 0.9251919984817505, "rewards/rejected": -1.014591097831726, "step": 201 }, { "epoch": 0.02, "learning_rate": 2.97506730656678e-07, "logits/chosen": -3.0892515182495117, "logits/rejected": -3.2398135662078857, "logps/chosen": -260.987060546875, "logps/rejected": -316.5274353027344, "loss": 0.5312, "rewards/accuracies": 0.625, "rewards/chosen": -0.023985620588064194, "rewards/margins": 0.5565420985221863, "rewards/rejected": -0.580527663230896, "step": 202 }, { "epoch": 0.02, "learning_rate": 2.9747161418705373e-07, "logits/chosen": -2.6565470695495605, "logits/rejected": -2.48661208152771, "logps/chosen": -158.07730102539062, "logps/rejected": -314.88397216796875, "loss": 0.4125, "rewards/accuracies": 0.875, "rewards/chosen": -0.015331745147705078, "rewards/margins": 0.9793165922164917, "rewards/rejected": -0.994648277759552, "step": 203 }, { "epoch": 0.02, "learning_rate": 2.9743649771742944e-07, "logits/chosen": -3.140512466430664, "logits/rejected": -3.50887393951416, "logps/chosen": -116.6439208984375, "logps/rejected": -185.6629638671875, "loss": 0.4969, "rewards/accuracies": 0.75, "rewards/chosen": 0.09205470234155655, "rewards/margins": 0.7980520129203796, "rewards/rejected": -0.7059973478317261, "step": 204 }, { "epoch": 0.02, "learning_rate": 2.974013812478052e-07, "logits/chosen": -2.906808853149414, "logits/rejected": -3.275259256362915, "logps/chosen": -138.61817932128906, "logps/rejected": -273.44415283203125, "loss": 0.3595, "rewards/accuracies": 0.875, "rewards/chosen": -0.023972608149051666, "rewards/margins": 1.4868533611297607, "rewards/rejected": -1.5108258724212646, "step": 205 }, { "epoch": 0.02, "learning_rate": 2.9736626477818094e-07, "logits/chosen": -2.807100296020508, "logits/rejected": -2.839050769805908, "logps/chosen": -307.84112548828125, "logps/rejected": -267.107666015625, "loss": 0.6733, "rewards/accuracies": 0.625, "rewards/chosen": -0.42347633838653564, "rewards/margins": 0.1767052412033081, "rewards/rejected": -0.6001815795898438, "step": 206 }, { "epoch": 0.02, "learning_rate": 2.973311483085567e-07, "logits/chosen": -2.9292707443237305, "logits/rejected": -2.70388126373291, "logps/chosen": -258.6240234375, "logps/rejected": -243.3085479736328, "loss": 0.5161, "rewards/accuracies": 0.75, "rewards/chosen": 0.16219741106033325, "rewards/margins": 0.488385945558548, "rewards/rejected": -0.3261885344982147, "step": 207 }, { "epoch": 0.02, "learning_rate": 2.9729603183893245e-07, "logits/chosen": -3.6700987815856934, "logits/rejected": -3.612215042114258, "logps/chosen": -294.9308776855469, "logps/rejected": -310.5212707519531, "loss": 0.6661, "rewards/accuracies": 0.5, "rewards/chosen": -0.5405645966529846, "rewards/margins": 0.21268552541732788, "rewards/rejected": -0.7532501220703125, "step": 208 }, { "epoch": 0.02, "learning_rate": 2.972609153693082e-07, "logits/chosen": -3.4860682487487793, "logits/rejected": -3.761573553085327, "logps/chosen": -219.30694580078125, "logps/rejected": -250.24530029296875, "loss": 0.4722, "rewards/accuracies": 0.625, "rewards/chosen": -0.038902655243873596, "rewards/margins": 0.6632366180419922, "rewards/rejected": -0.7021392583847046, "step": 209 }, { "epoch": 0.02, "learning_rate": 2.9722579889968396e-07, "logits/chosen": -2.762735605239868, "logits/rejected": -2.594841480255127, "logps/chosen": -255.82757568359375, "logps/rejected": -148.57679748535156, "loss": 0.5113, "rewards/accuracies": 0.75, "rewards/chosen": 0.11014002561569214, "rewards/margins": 0.6784398555755615, "rewards/rejected": -0.5682997703552246, "step": 210 }, { "epoch": 0.02, "learning_rate": 2.971906824300597e-07, "logits/chosen": -3.3610377311706543, "logits/rejected": -3.216486692428589, "logps/chosen": -199.43492126464844, "logps/rejected": -169.95294189453125, "loss": 0.4486, "rewards/accuracies": 1.0, "rewards/chosen": 0.14022159576416016, "rewards/margins": 0.6015288233757019, "rewards/rejected": -0.46130722761154175, "step": 211 }, { "epoch": 0.02, "learning_rate": 2.971555659604354e-07, "logits/chosen": -3.5222673416137695, "logits/rejected": -3.2268295288085938, "logps/chosen": -161.68572998046875, "logps/rejected": -110.05447387695312, "loss": 0.8698, "rewards/accuracies": 0.375, "rewards/chosen": -0.5529178380966187, "rewards/margins": -0.2131844162940979, "rewards/rejected": -0.33973339200019836, "step": 212 }, { "epoch": 0.02, "learning_rate": 2.9712044949081117e-07, "logits/chosen": -3.728032350540161, "logits/rejected": -3.4043331146240234, "logps/chosen": -208.7862091064453, "logps/rejected": -179.4081268310547, "loss": 0.6808, "rewards/accuracies": 0.625, "rewards/chosen": -0.28625184297561646, "rewards/margins": 0.17475757002830505, "rewards/rejected": -0.4610094428062439, "step": 213 }, { "epoch": 0.02, "learning_rate": 2.970853330211869e-07, "logits/chosen": -2.6386070251464844, "logits/rejected": -2.7980525493621826, "logps/chosen": -516.9013671875, "logps/rejected": -268.7263488769531, "loss": 0.7721, "rewards/accuracies": 0.625, "rewards/chosen": -0.15232467651367188, "rewards/margins": -0.07989473640918732, "rewards/rejected": -0.07242995500564575, "step": 214 }, { "epoch": 0.02, "learning_rate": 2.970502165515627e-07, "logits/chosen": -3.597278118133545, "logits/rejected": -3.415316343307495, "logps/chosen": -290.1488037109375, "logps/rejected": -283.62860107421875, "loss": 0.5235, "rewards/accuracies": 0.625, "rewards/chosen": -0.08039014041423798, "rewards/margins": 0.5221853256225586, "rewards/rejected": -0.602575421333313, "step": 215 }, { "epoch": 0.02, "learning_rate": 2.9701510008193843e-07, "logits/chosen": -3.453481912612915, "logits/rejected": -3.2548611164093018, "logps/chosen": -236.84219360351562, "logps/rejected": -185.232421875, "loss": 0.6122, "rewards/accuracies": 0.625, "rewards/chosen": -0.0571928545832634, "rewards/margins": 0.2992919683456421, "rewards/rejected": -0.3564848303794861, "step": 216 }, { "epoch": 0.03, "learning_rate": 2.969799836123142e-07, "logits/chosen": -3.678865432739258, "logits/rejected": -3.0691254138946533, "logps/chosen": -281.11480712890625, "logps/rejected": -298.4317932128906, "loss": 0.5655, "rewards/accuracies": 0.75, "rewards/chosen": -0.013596925884485245, "rewards/margins": 0.5251922607421875, "rewards/rejected": -0.5387891530990601, "step": 217 }, { "epoch": 0.03, "learning_rate": 2.969448671426899e-07, "logits/chosen": -2.94370698928833, "logits/rejected": -3.2599921226501465, "logps/chosen": -271.5096740722656, "logps/rejected": -298.1865539550781, "loss": 0.9749, "rewards/accuracies": 0.5, "rewards/chosen": -0.46265023946762085, "rewards/margins": 0.10639116168022156, "rewards/rejected": -0.5690414905548096, "step": 218 }, { "epoch": 0.03, "learning_rate": 2.9690975067306564e-07, "logits/chosen": -2.6451072692871094, "logits/rejected": -2.4324116706848145, "logps/chosen": -210.8350067138672, "logps/rejected": -322.6939697265625, "loss": 0.6167, "rewards/accuracies": 0.625, "rewards/chosen": -0.2769347131252289, "rewards/margins": 0.44237735867500305, "rewards/rejected": -0.7193120718002319, "step": 219 }, { "epoch": 0.03, "learning_rate": 2.968746342034414e-07, "logits/chosen": -3.206939458847046, "logits/rejected": -3.2500791549682617, "logps/chosen": -226.1312255859375, "logps/rejected": -276.7279357910156, "loss": 0.6905, "rewards/accuracies": 0.625, "rewards/chosen": -0.2716180384159088, "rewards/margins": 0.07935592532157898, "rewards/rejected": -0.3509739637374878, "step": 220 }, { "epoch": 0.03, "learning_rate": 2.9683951773381715e-07, "logits/chosen": -3.516022205352783, "logits/rejected": -3.3892829418182373, "logps/chosen": -245.95211791992188, "logps/rejected": -190.95635986328125, "loss": 0.5184, "rewards/accuracies": 1.0, "rewards/chosen": 0.09167724847793579, "rewards/margins": 0.4101697504520416, "rewards/rejected": -0.31849250197410583, "step": 221 }, { "epoch": 0.03, "learning_rate": 2.968044012641929e-07, "logits/chosen": -2.7190985679626465, "logits/rejected": -2.912688732147217, "logps/chosen": -76.05853271484375, "logps/rejected": -322.3135070800781, "loss": 0.6346, "rewards/accuracies": 0.875, "rewards/chosen": -0.153318852186203, "rewards/margins": 0.22015169262886047, "rewards/rejected": -0.3734705448150635, "step": 222 }, { "epoch": 0.03, "learning_rate": 2.9676928479456865e-07, "logits/chosen": -2.8298537731170654, "logits/rejected": -2.6934783458709717, "logps/chosen": -300.29364013671875, "logps/rejected": -308.415771484375, "loss": 0.5984, "rewards/accuracies": 0.5, "rewards/chosen": -0.28014540672302246, "rewards/margins": 0.6264234781265259, "rewards/rejected": -0.9065688848495483, "step": 223 }, { "epoch": 0.03, "learning_rate": 2.967341683249444e-07, "logits/chosen": -3.0922107696533203, "logits/rejected": -2.8710861206054688, "logps/chosen": -326.006103515625, "logps/rejected": -281.7949523925781, "loss": 0.6966, "rewards/accuracies": 0.5, "rewards/chosen": -0.4827280044555664, "rewards/margins": 0.3587973713874817, "rewards/rejected": -0.8415253162384033, "step": 224 }, { "epoch": 0.03, "learning_rate": 2.9669905185532016e-07, "logits/chosen": -2.3920328617095947, "logits/rejected": -2.381572723388672, "logps/chosen": -320.2467346191406, "logps/rejected": -273.5317077636719, "loss": 0.8224, "rewards/accuracies": 0.25, "rewards/chosen": -0.3464587330818176, "rewards/margins": -0.1277146190404892, "rewards/rejected": -0.21874408423900604, "step": 225 }, { "epoch": 0.03, "learning_rate": 2.9666393538569586e-07, "logits/chosen": -3.5929512977600098, "logits/rejected": -3.31587815284729, "logps/chosen": -214.7141876220703, "logps/rejected": -184.9407196044922, "loss": 0.6134, "rewards/accuracies": 0.5, "rewards/chosen": -0.2481451779603958, "rewards/margins": 0.21901190280914307, "rewards/rejected": -0.4671570658683777, "step": 226 }, { "epoch": 0.03, "learning_rate": 2.966288189160716e-07, "logits/chosen": -2.9478559494018555, "logits/rejected": -2.8069493770599365, "logps/chosen": -203.62388610839844, "logps/rejected": -189.1769256591797, "loss": 0.5301, "rewards/accuracies": 0.75, "rewards/chosen": -0.07000663876533508, "rewards/margins": 0.5580227971076965, "rewards/rejected": -0.6280293464660645, "step": 227 }, { "epoch": 0.03, "learning_rate": 2.9659370244644737e-07, "logits/chosen": -2.190747022628784, "logits/rejected": -2.487834930419922, "logps/chosen": -381.3501281738281, "logps/rejected": -243.27737426757812, "loss": 0.4856, "rewards/accuracies": 0.875, "rewards/chosen": 0.004550546407699585, "rewards/margins": 0.6609116792678833, "rewards/rejected": -0.6563611030578613, "step": 228 }, { "epoch": 0.03, "learning_rate": 2.965585859768231e-07, "logits/chosen": -3.680727481842041, "logits/rejected": -3.666208267211914, "logps/chosen": -222.2845916748047, "logps/rejected": -314.9975891113281, "loss": 0.4674, "rewards/accuracies": 0.75, "rewards/chosen": -0.030049512162804604, "rewards/margins": 0.9947059154510498, "rewards/rejected": -1.0247553586959839, "step": 229 }, { "epoch": 0.03, "learning_rate": 2.965234695071989e-07, "logits/chosen": -3.4629735946655273, "logits/rejected": -3.31046724319458, "logps/chosen": -221.97340393066406, "logps/rejected": -126.62712097167969, "loss": 0.8937, "rewards/accuracies": 0.5, "rewards/chosen": -0.2501615285873413, "rewards/margins": 0.16573986411094666, "rewards/rejected": -0.4159013032913208, "step": 230 }, { "epoch": 0.03, "learning_rate": 2.964883530375746e-07, "logits/chosen": -3.419600009918213, "logits/rejected": -3.4507951736450195, "logps/chosen": -229.01889038085938, "logps/rejected": -358.0796203613281, "loss": 0.4654, "rewards/accuracies": 0.875, "rewards/chosen": -0.3408559262752533, "rewards/margins": 0.7648007273674011, "rewards/rejected": -1.105656623840332, "step": 231 }, { "epoch": 0.03, "learning_rate": 2.964532365679504e-07, "logits/chosen": -3.765226364135742, "logits/rejected": -3.8680765628814697, "logps/chosen": -188.7469482421875, "logps/rejected": -184.66305541992188, "loss": 0.5588, "rewards/accuracies": 0.75, "rewards/chosen": -0.1464422643184662, "rewards/margins": 0.5799602270126343, "rewards/rejected": -0.7264024615287781, "step": 232 }, { "epoch": 0.03, "learning_rate": 2.964181200983261e-07, "logits/chosen": -2.745063304901123, "logits/rejected": -2.8669588565826416, "logps/chosen": -286.19873046875, "logps/rejected": -265.67486572265625, "loss": 0.5542, "rewards/accuracies": 0.625, "rewards/chosen": -0.05369701236486435, "rewards/margins": 0.5675129890441895, "rewards/rejected": -0.6212100386619568, "step": 233 }, { "epoch": 0.03, "learning_rate": 2.9638300362870184e-07, "logits/chosen": -2.836422920227051, "logits/rejected": -2.7854535579681396, "logps/chosen": -287.45306396484375, "logps/rejected": -154.11834716796875, "loss": 0.5566, "rewards/accuracies": 0.625, "rewards/chosen": -0.1475120484828949, "rewards/margins": 0.548670768737793, "rewards/rejected": -0.6961827874183655, "step": 234 }, { "epoch": 0.03, "learning_rate": 2.963478871590776e-07, "logits/chosen": -3.0314126014709473, "logits/rejected": -3.105581283569336, "logps/chosen": -291.4547424316406, "logps/rejected": -380.5617370605469, "loss": 0.2724, "rewards/accuracies": 1.0, "rewards/chosen": 0.11533575505018234, "rewards/margins": 1.307447075843811, "rewards/rejected": -1.1921114921569824, "step": 235 }, { "epoch": 0.03, "learning_rate": 2.9631277068945335e-07, "logits/chosen": -3.2220299243927, "logits/rejected": -3.0955076217651367, "logps/chosen": -371.5953063964844, "logps/rejected": -283.3895568847656, "loss": 0.5466, "rewards/accuracies": 0.75, "rewards/chosen": -0.1008606031537056, "rewards/margins": 0.5603925585746765, "rewards/rejected": -0.6612531542778015, "step": 236 }, { "epoch": 0.03, "learning_rate": 2.962776542198291e-07, "logits/chosen": -3.0689451694488525, "logits/rejected": -3.151905059814453, "logps/chosen": -162.16529846191406, "logps/rejected": -269.0153503417969, "loss": 0.5308, "rewards/accuracies": 0.75, "rewards/chosen": -0.14437344670295715, "rewards/margins": 0.4573960304260254, "rewards/rejected": -0.6017694473266602, "step": 237 }, { "epoch": 0.03, "learning_rate": 2.9624253775020485e-07, "logits/chosen": -2.5645570755004883, "logits/rejected": -2.456944465637207, "logps/chosen": -430.4282531738281, "logps/rejected": -340.0616760253906, "loss": 0.5597, "rewards/accuracies": 0.5, "rewards/chosen": -0.2560270428657532, "rewards/margins": 0.7348439693450928, "rewards/rejected": -0.9908709526062012, "step": 238 }, { "epoch": 0.03, "learning_rate": 2.9620742128058056e-07, "logits/chosen": -2.7330174446105957, "logits/rejected": -2.7327356338500977, "logps/chosen": -277.7727355957031, "logps/rejected": -228.38668823242188, "loss": 0.5331, "rewards/accuracies": 0.75, "rewards/chosen": -0.19206827878952026, "rewards/margins": 0.5544301271438599, "rewards/rejected": -0.7464984655380249, "step": 239 }, { "epoch": 0.03, "learning_rate": 2.961723048109563e-07, "logits/chosen": -3.7633213996887207, "logits/rejected": -3.566074848175049, "logps/chosen": -141.76890563964844, "logps/rejected": -142.35098266601562, "loss": 0.4026, "rewards/accuracies": 0.75, "rewards/chosen": 0.10821930319070816, "rewards/margins": 1.1136753559112549, "rewards/rejected": -1.0054560899734497, "step": 240 }, { "epoch": 0.03, "learning_rate": 2.9613718834133206e-07, "logits/chosen": -2.6251418590545654, "logits/rejected": -2.9427804946899414, "logps/chosen": -120.33463287353516, "logps/rejected": -202.0577392578125, "loss": 0.3456, "rewards/accuracies": 0.875, "rewards/chosen": 0.10063973069190979, "rewards/margins": 1.3071494102478027, "rewards/rejected": -1.2065095901489258, "step": 241 }, { "epoch": 0.03, "learning_rate": 2.961020718717078e-07, "logits/chosen": -3.6507034301757812, "logits/rejected": -3.8930370807647705, "logps/chosen": -165.0177001953125, "logps/rejected": -206.48675537109375, "loss": 0.7507, "rewards/accuracies": 0.75, "rewards/chosen": -0.04302286356687546, "rewards/margins": 0.16279834508895874, "rewards/rejected": -0.2058212161064148, "step": 242 }, { "epoch": 0.03, "learning_rate": 2.9606695540208357e-07, "logits/chosen": -2.7475037574768066, "logits/rejected": -3.154788017272949, "logps/chosen": -240.9046630859375, "logps/rejected": -202.08717346191406, "loss": 0.5142, "rewards/accuracies": 0.75, "rewards/chosen": 0.00800538994371891, "rewards/margins": 0.7327769994735718, "rewards/rejected": -0.7247716188430786, "step": 243 }, { "epoch": 0.03, "learning_rate": 2.960318389324593e-07, "logits/chosen": -3.4889817237854004, "logits/rejected": -3.0416648387908936, "logps/chosen": -241.79881286621094, "logps/rejected": -251.40850830078125, "loss": 0.602, "rewards/accuracies": 0.75, "rewards/chosen": 0.06766597926616669, "rewards/margins": 0.6569793224334717, "rewards/rejected": -0.5893133282661438, "step": 244 }, { "epoch": 0.03, "learning_rate": 2.959967224628351e-07, "logits/chosen": -3.3415560722351074, "logits/rejected": -3.5458202362060547, "logps/chosen": -147.89974975585938, "logps/rejected": -322.64581298828125, "loss": 0.4864, "rewards/accuracies": 0.75, "rewards/chosen": -0.1522575318813324, "rewards/margins": 1.0821325778961182, "rewards/rejected": -1.234390139579773, "step": 245 }, { "epoch": 0.03, "learning_rate": 2.9596160599321083e-07, "logits/chosen": -3.1987807750701904, "logits/rejected": -3.247431993484497, "logps/chosen": -250.9003448486328, "logps/rejected": -149.986328125, "loss": 0.5994, "rewards/accuracies": 0.875, "rewards/chosen": -0.14529022574424744, "rewards/margins": 0.3438277840614319, "rewards/rejected": -0.48911798000335693, "step": 246 }, { "epoch": 0.03, "learning_rate": 2.9592648952358653e-07, "logits/chosen": -3.712801933288574, "logits/rejected": -3.555534839630127, "logps/chosen": -321.03778076171875, "logps/rejected": -269.14007568359375, "loss": 0.4639, "rewards/accuracies": 0.875, "rewards/chosen": -0.012854758650064468, "rewards/margins": 0.7215206027030945, "rewards/rejected": -0.7343753576278687, "step": 247 }, { "epoch": 0.03, "learning_rate": 2.958913730539623e-07, "logits/chosen": -3.397170066833496, "logits/rejected": -3.4199612140655518, "logps/chosen": -244.46722412109375, "logps/rejected": -202.02520751953125, "loss": 0.4384, "rewards/accuracies": 0.875, "rewards/chosen": 0.12344011664390564, "rewards/margins": 0.9416974186897278, "rewards/rejected": -0.8182573318481445, "step": 248 }, { "epoch": 0.03, "learning_rate": 2.9585625658433804e-07, "logits/chosen": -2.8129043579101562, "logits/rejected": -2.795145034790039, "logps/chosen": -252.59750366210938, "logps/rejected": -142.37274169921875, "loss": 0.6231, "rewards/accuracies": 0.625, "rewards/chosen": -0.17783230543136597, "rewards/margins": 0.5210087299346924, "rewards/rejected": -0.6988410949707031, "step": 249 }, { "epoch": 0.03, "learning_rate": 2.958211401147138e-07, "logits/chosen": -3.1448240280151367, "logits/rejected": -2.9438672065734863, "logps/chosen": -246.2015380859375, "logps/rejected": -216.67286682128906, "loss": 0.5658, "rewards/accuracies": 0.75, "rewards/chosen": -0.19408829510211945, "rewards/margins": 0.41872453689575195, "rewards/rejected": -0.6128128170967102, "step": 250 }, { "epoch": 0.03, "learning_rate": 2.9578602364508955e-07, "logits/chosen": -3.0558786392211914, "logits/rejected": -3.134983539581299, "logps/chosen": -293.5311584472656, "logps/rejected": -334.5091552734375, "loss": 0.4784, "rewards/accuracies": 0.625, "rewards/chosen": -0.09639740735292435, "rewards/margins": 0.9320396780967712, "rewards/rejected": -1.0284371376037598, "step": 251 }, { "epoch": 0.03, "learning_rate": 2.9575090717546525e-07, "logits/chosen": -3.5703725814819336, "logits/rejected": -3.4591760635375977, "logps/chosen": -122.27105712890625, "logps/rejected": -138.0686798095703, "loss": 0.6985, "rewards/accuracies": 0.625, "rewards/chosen": -0.2735036313533783, "rewards/margins": 0.22693270444869995, "rewards/rejected": -0.5004363656044006, "step": 252 }, { "epoch": 0.03, "learning_rate": 2.95715790705841e-07, "logits/chosen": -2.582799196243286, "logits/rejected": -2.5798473358154297, "logps/chosen": -367.116943359375, "logps/rejected": -163.09207153320312, "loss": 0.5897, "rewards/accuracies": 0.625, "rewards/chosen": 0.043112900108098984, "rewards/margins": 0.2766725420951843, "rewards/rejected": -0.23355963826179504, "step": 253 }, { "epoch": 0.03, "learning_rate": 2.956806742362168e-07, "logits/chosen": -2.7647547721862793, "logits/rejected": -2.7235262393951416, "logps/chosen": -255.77725219726562, "logps/rejected": -248.46710205078125, "loss": 0.6137, "rewards/accuracies": 0.75, "rewards/chosen": -0.09696606546640396, "rewards/margins": 0.21835319697856903, "rewards/rejected": -0.3153192698955536, "step": 254 }, { "epoch": 0.03, "learning_rate": 2.956455577665925e-07, "logits/chosen": -2.8253111839294434, "logits/rejected": -2.7937302589416504, "logps/chosen": -269.11822509765625, "logps/rejected": -147.9305419921875, "loss": 0.735, "rewards/accuracies": 0.5, "rewards/chosen": -0.3757493793964386, "rewards/margins": 0.13268440961837769, "rewards/rejected": -0.5084337592124939, "step": 255 }, { "epoch": 0.03, "learning_rate": 2.9561044129696827e-07, "logits/chosen": -3.5680689811706543, "logits/rejected": -3.8017170429229736, "logps/chosen": -184.30157470703125, "logps/rejected": -181.49844360351562, "loss": 0.5662, "rewards/accuracies": 0.625, "rewards/chosen": -0.34342139959335327, "rewards/margins": 0.41154569387435913, "rewards/rejected": -0.7549671530723572, "step": 256 }, { "epoch": 0.03, "learning_rate": 2.95575324827344e-07, "logits/chosen": -3.413520336151123, "logits/rejected": -3.2705631256103516, "logps/chosen": -207.03585815429688, "logps/rejected": -187.05044555664062, "loss": 0.4434, "rewards/accuracies": 0.75, "rewards/chosen": -0.044650763273239136, "rewards/margins": 0.9544289112091064, "rewards/rejected": -0.999079704284668, "step": 257 }, { "epoch": 0.03, "learning_rate": 2.9554020835771977e-07, "logits/chosen": -3.250988721847534, "logits/rejected": -3.1080923080444336, "logps/chosen": -311.73333740234375, "logps/rejected": -207.7476806640625, "loss": 1.0542, "rewards/accuracies": 0.25, "rewards/chosen": -0.6775745153427124, "rewards/margins": -0.48238229751586914, "rewards/rejected": -0.19519217312335968, "step": 258 }, { "epoch": 0.03, "learning_rate": 2.9550509188809553e-07, "logits/chosen": -3.295384407043457, "logits/rejected": -3.2088217735290527, "logps/chosen": -212.302490234375, "logps/rejected": -306.0133056640625, "loss": 0.4493, "rewards/accuracies": 0.75, "rewards/chosen": -0.10247643291950226, "rewards/margins": 0.7426782250404358, "rewards/rejected": -0.8451547622680664, "step": 259 }, { "epoch": 0.03, "learning_rate": 2.9546997541847123e-07, "logits/chosen": -2.9110910892486572, "logits/rejected": -2.7901899814605713, "logps/chosen": -161.24423217773438, "logps/rejected": -165.93568420410156, "loss": 0.5942, "rewards/accuracies": 0.75, "rewards/chosen": -0.27035072445869446, "rewards/margins": 0.29355430603027344, "rewards/rejected": -0.5639050602912903, "step": 260 }, { "epoch": 0.03, "learning_rate": 2.95434858948847e-07, "logits/chosen": -3.393010377883911, "logits/rejected": -3.228618621826172, "logps/chosen": -278.3468322753906, "logps/rejected": -240.8148651123047, "loss": 0.7107, "rewards/accuracies": 0.5, "rewards/chosen": -0.024721860885620117, "rewards/margins": 0.11118073761463165, "rewards/rejected": -0.13590261340141296, "step": 261 }, { "epoch": 0.03, "learning_rate": 2.9539974247922274e-07, "logits/chosen": -2.8101284503936768, "logits/rejected": -2.7222795486450195, "logps/chosen": -408.3419189453125, "logps/rejected": -332.5548400878906, "loss": 0.5974, "rewards/accuracies": 0.75, "rewards/chosen": -0.15421709418296814, "rewards/margins": 0.5535743236541748, "rewards/rejected": -0.7077913880348206, "step": 262 }, { "epoch": 0.03, "learning_rate": 2.953646260095985e-07, "logits/chosen": -2.37119197845459, "logits/rejected": -2.436436176300049, "logps/chosen": -217.45526123046875, "logps/rejected": -155.53355407714844, "loss": 0.6469, "rewards/accuracies": 0.625, "rewards/chosen": -0.2927638292312622, "rewards/margins": 0.38705724477767944, "rewards/rejected": -0.6798210740089417, "step": 263 }, { "epoch": 0.03, "learning_rate": 2.9532950953997424e-07, "logits/chosen": -3.383761405944824, "logits/rejected": -3.105156183242798, "logps/chosen": -288.82489013671875, "logps/rejected": -234.01803588867188, "loss": 0.5456, "rewards/accuracies": 0.875, "rewards/chosen": 0.03502259403467178, "rewards/margins": 0.36700737476348877, "rewards/rejected": -0.3319847881793976, "step": 264 }, { "epoch": 0.03, "learning_rate": 2.9529439307034994e-07, "logits/chosen": -3.2609729766845703, "logits/rejected": -3.3870296478271484, "logps/chosen": -366.5753479003906, "logps/rejected": -280.2196044921875, "loss": 0.3967, "rewards/accuracies": 0.75, "rewards/chosen": 0.04168878495693207, "rewards/margins": 1.0339088439941406, "rewards/rejected": -0.9922200441360474, "step": 265 }, { "epoch": 0.03, "learning_rate": 2.9525927660072575e-07, "logits/chosen": -3.5743567943573, "logits/rejected": -3.2901411056518555, "logps/chosen": -223.4453125, "logps/rejected": -137.41665649414062, "loss": 0.7586, "rewards/accuracies": 0.625, "rewards/chosen": -0.08028890192508698, "rewards/margins": -0.00903475284576416, "rewards/rejected": -0.07125413417816162, "step": 266 }, { "epoch": 0.03, "learning_rate": 2.952241601311015e-07, "logits/chosen": -2.895266056060791, "logits/rejected": -2.9548327922821045, "logps/chosen": -129.94833374023438, "logps/rejected": -185.19534301757812, "loss": 0.6044, "rewards/accuracies": 0.5, "rewards/chosen": 0.03316054493188858, "rewards/margins": 0.39170870184898376, "rewards/rejected": -0.3585481643676758, "step": 267 }, { "epoch": 0.03, "learning_rate": 2.951890436614772e-07, "logits/chosen": -3.622300148010254, "logits/rejected": -3.056347370147705, "logps/chosen": -250.09535217285156, "logps/rejected": -190.8685760498047, "loss": 0.6703, "rewards/accuracies": 0.375, "rewards/chosen": -0.08594925701618195, "rewards/margins": 0.30523163080215454, "rewards/rejected": -0.3911808431148529, "step": 268 }, { "epoch": 0.03, "learning_rate": 2.9515392719185296e-07, "logits/chosen": -3.10042667388916, "logits/rejected": -3.348539113998413, "logps/chosen": -220.8554229736328, "logps/rejected": -203.28074645996094, "loss": 0.5053, "rewards/accuracies": 0.75, "rewards/chosen": -0.19195130467414856, "rewards/margins": 0.6746183633804321, "rewards/rejected": -0.8665696382522583, "step": 269 }, { "epoch": 0.03, "learning_rate": 2.951188107222287e-07, "logits/chosen": -3.3677988052368164, "logits/rejected": -3.060849189758301, "logps/chosen": -468.64080810546875, "logps/rejected": -307.90435791015625, "loss": 0.6562, "rewards/accuracies": 0.5, "rewards/chosen": -0.163176029920578, "rewards/margins": 0.42041224241256714, "rewards/rejected": -0.5835882425308228, "step": 270 }, { "epoch": 0.03, "learning_rate": 2.9508369425260447e-07, "logits/chosen": -2.292534351348877, "logits/rejected": -2.2191662788391113, "logps/chosen": -198.69781494140625, "logps/rejected": -153.9535369873047, "loss": 0.5588, "rewards/accuracies": 0.625, "rewards/chosen": -0.09385165572166443, "rewards/margins": 0.4356576204299927, "rewards/rejected": -0.5295092463493347, "step": 271 }, { "epoch": 0.03, "learning_rate": 2.950485777829802e-07, "logits/chosen": -3.2396583557128906, "logits/rejected": -2.7925634384155273, "logps/chosen": -393.10662841796875, "logps/rejected": -274.460205078125, "loss": 0.538, "rewards/accuracies": 0.75, "rewards/chosen": -0.06138180196285248, "rewards/margins": 0.39145928621292114, "rewards/rejected": -0.4528411030769348, "step": 272 }, { "epoch": 0.03, "learning_rate": 2.950134613133559e-07, "logits/chosen": -2.5421390533447266, "logits/rejected": -2.56845760345459, "logps/chosen": -414.1586608886719, "logps/rejected": -324.009765625, "loss": 0.6283, "rewards/accuracies": 0.75, "rewards/chosen": 0.2678390443325043, "rewards/margins": 0.5445848703384399, "rewards/rejected": -0.2767457664012909, "step": 273 }, { "epoch": 0.03, "learning_rate": 2.949783448437317e-07, "logits/chosen": -3.293621301651001, "logits/rejected": -3.4014596939086914, "logps/chosen": -178.57855224609375, "logps/rejected": -322.4431457519531, "loss": 0.5331, "rewards/accuracies": 0.75, "rewards/chosen": -0.27491435408592224, "rewards/margins": 0.49620288610458374, "rewards/rejected": -0.7711172103881836, "step": 274 }, { "epoch": 0.03, "learning_rate": 2.9494322837410743e-07, "logits/chosen": -3.5934128761291504, "logits/rejected": -3.473421096801758, "logps/chosen": -316.4269104003906, "logps/rejected": -280.4856872558594, "loss": 0.3596, "rewards/accuracies": 0.75, "rewards/chosen": 0.33110618591308594, "rewards/margins": 1.189288854598999, "rewards/rejected": -0.8581825494766235, "step": 275 }, { "epoch": 0.03, "learning_rate": 2.949081119044832e-07, "logits/chosen": -2.942293643951416, "logits/rejected": -3.1980695724487305, "logps/chosen": -222.44879150390625, "logps/rejected": -168.8373565673828, "loss": 0.6689, "rewards/accuracies": 0.375, "rewards/chosen": -0.11472505331039429, "rewards/margins": 0.28952834010124207, "rewards/rejected": -0.40425336360931396, "step": 276 }, { "epoch": 0.03, "learning_rate": 2.9487299543485894e-07, "logits/chosen": -2.7216310501098633, "logits/rejected": -2.735112428665161, "logps/chosen": -142.8868865966797, "logps/rejected": -118.58480834960938, "loss": 0.6078, "rewards/accuracies": 0.625, "rewards/chosen": -0.04224151372909546, "rewards/margins": 0.29635509848594666, "rewards/rejected": -0.3385966122150421, "step": 277 }, { "epoch": 0.03, "learning_rate": 2.948378789652347e-07, "logits/chosen": -3.320821762084961, "logits/rejected": -3.3334197998046875, "logps/chosen": -331.442138671875, "logps/rejected": -326.5345153808594, "loss": 0.474, "rewards/accuracies": 0.75, "rewards/chosen": -0.3174463212490082, "rewards/margins": 0.6842853426933289, "rewards/rejected": -1.0017316341400146, "step": 278 }, { "epoch": 0.03, "learning_rate": 2.9480276249561045e-07, "logits/chosen": -3.4243664741516113, "logits/rejected": -3.8387043476104736, "logps/chosen": -209.17385864257812, "logps/rejected": -207.16128540039062, "loss": 0.3675, "rewards/accuracies": 0.875, "rewards/chosen": 0.11109502613544464, "rewards/margins": 1.3444573879241943, "rewards/rejected": -1.2333624362945557, "step": 279 }, { "epoch": 0.03, "learning_rate": 2.947676460259862e-07, "logits/chosen": -2.8244261741638184, "logits/rejected": -2.9401025772094727, "logps/chosen": -322.2203674316406, "logps/rejected": -391.6444091796875, "loss": 0.8416, "rewards/accuracies": 0.5, "rewards/chosen": 0.0325528085231781, "rewards/margins": -0.13652373850345612, "rewards/rejected": 0.16907654702663422, "step": 280 }, { "epoch": 0.03, "learning_rate": 2.947325295563619e-07, "logits/chosen": -3.6474616527557373, "logits/rejected": -3.429649591445923, "logps/chosen": -279.5596008300781, "logps/rejected": -244.8258514404297, "loss": 0.4327, "rewards/accuracies": 0.875, "rewards/chosen": -0.45788782835006714, "rewards/margins": 0.9634177684783936, "rewards/rejected": -1.4213056564331055, "step": 281 }, { "epoch": 0.03, "learning_rate": 2.9469741308673765e-07, "logits/chosen": -3.269453763961792, "logits/rejected": -3.0245208740234375, "logps/chosen": -336.4051513671875, "logps/rejected": -291.3902282714844, "loss": 0.5383, "rewards/accuracies": 0.75, "rewards/chosen": -0.1263527274131775, "rewards/margins": 0.4369955062866211, "rewards/rejected": -0.5633482933044434, "step": 282 }, { "epoch": 0.03, "learning_rate": 2.946622966171134e-07, "logits/chosen": -3.0807693004608154, "logits/rejected": -2.7006919384002686, "logps/chosen": -320.9117431640625, "logps/rejected": -237.8446044921875, "loss": 0.7154, "rewards/accuracies": 0.5, "rewards/chosen": -0.49351128935813904, "rewards/margins": 0.21444934606552124, "rewards/rejected": -0.7079607248306274, "step": 283 }, { "epoch": 0.03, "learning_rate": 2.9462718014748916e-07, "logits/chosen": -2.948262929916382, "logits/rejected": -2.9350359439849854, "logps/chosen": -274.36187744140625, "logps/rejected": -205.1109619140625, "loss": 0.6456, "rewards/accuracies": 0.625, "rewards/chosen": -0.08139395713806152, "rewards/margins": 0.1307264119386673, "rewards/rejected": -0.21212036907672882, "step": 284 }, { "epoch": 0.03, "learning_rate": 2.945920636778649e-07, "logits/chosen": -2.964962959289551, "logits/rejected": -3.2351560592651367, "logps/chosen": -338.36151123046875, "logps/rejected": -358.1169738769531, "loss": 0.2838, "rewards/accuracies": 0.75, "rewards/chosen": 0.11050411313772202, "rewards/margins": 2.0304903984069824, "rewards/rejected": -1.9199864864349365, "step": 285 }, { "epoch": 0.03, "learning_rate": 2.945569472082406e-07, "logits/chosen": -2.6364738941192627, "logits/rejected": -2.820420265197754, "logps/chosen": -173.23806762695312, "logps/rejected": -286.0772399902344, "loss": 0.4242, "rewards/accuracies": 0.625, "rewards/chosen": 0.009502798318862915, "rewards/margins": 1.4162652492523193, "rewards/rejected": -1.4067624807357788, "step": 286 }, { "epoch": 0.03, "learning_rate": 2.9452183073861637e-07, "logits/chosen": -3.712928533554077, "logits/rejected": -4.027872085571289, "logps/chosen": -141.1028594970703, "logps/rejected": -178.35043334960938, "loss": 0.386, "rewards/accuracies": 0.875, "rewards/chosen": 0.03404662758111954, "rewards/margins": 1.340185284614563, "rewards/rejected": -1.3061386346817017, "step": 287 }, { "epoch": 0.03, "learning_rate": 2.944867142689922e-07, "logits/chosen": -3.055030107498169, "logits/rejected": -3.342327356338501, "logps/chosen": -349.2940979003906, "logps/rejected": -236.1305389404297, "loss": 0.8511, "rewards/accuracies": 0.625, "rewards/chosen": -0.5700069665908813, "rewards/margins": -0.04341600835323334, "rewards/rejected": -0.5265909433364868, "step": 288 }, { "epoch": 0.03, "learning_rate": 2.944515977993679e-07, "logits/chosen": -2.088204860687256, "logits/rejected": -2.1083574295043945, "logps/chosen": -406.4343566894531, "logps/rejected": -307.288818359375, "loss": 0.7923, "rewards/accuracies": 0.625, "rewards/chosen": -0.28288477659225464, "rewards/margins": 0.10361167788505554, "rewards/rejected": -0.3864964544773102, "step": 289 }, { "epoch": 0.03, "learning_rate": 2.9441648132974363e-07, "logits/chosen": -2.992600917816162, "logits/rejected": -2.660426139831543, "logps/chosen": -137.6371307373047, "logps/rejected": -177.1961669921875, "loss": 0.553, "rewards/accuracies": 0.625, "rewards/chosen": -0.11843032389879227, "rewards/margins": 0.3880849778652191, "rewards/rejected": -0.5065153241157532, "step": 290 }, { "epoch": 0.03, "learning_rate": 2.943813648601194e-07, "logits/chosen": -3.703796863555908, "logits/rejected": -3.789332389831543, "logps/chosen": -237.91632080078125, "logps/rejected": -285.07269287109375, "loss": 0.4759, "rewards/accuracies": 0.625, "rewards/chosen": -0.44285842776298523, "rewards/margins": 1.0705028772354126, "rewards/rejected": -1.5133612155914307, "step": 291 }, { "epoch": 0.03, "learning_rate": 2.9434624839049514e-07, "logits/chosen": -2.7813186645507812, "logits/rejected": -2.623178720474243, "logps/chosen": -104.82647705078125, "logps/rejected": -214.0926971435547, "loss": 0.6244, "rewards/accuracies": 0.625, "rewards/chosen": 0.11224200576543808, "rewards/margins": 0.2823832631111145, "rewards/rejected": -0.17014124989509583, "step": 292 }, { "epoch": 0.03, "learning_rate": 2.943111319208709e-07, "logits/chosen": -3.278107166290283, "logits/rejected": -3.2751479148864746, "logps/chosen": -279.2469177246094, "logps/rejected": -304.2403564453125, "loss": 0.8096, "rewards/accuracies": 0.5, "rewards/chosen": -0.2041158527135849, "rewards/margins": 0.1692911684513092, "rewards/rejected": -0.3734070062637329, "step": 293 }, { "epoch": 0.03, "learning_rate": 2.942760154512466e-07, "logits/chosen": -2.3685712814331055, "logits/rejected": -2.8315694332122803, "logps/chosen": -353.8135986328125, "logps/rejected": -248.48870849609375, "loss": 0.671, "rewards/accuracies": 0.5, "rewards/chosen": -0.025336403399705887, "rewards/margins": 0.2184801697731018, "rewards/rejected": -0.2438165694475174, "step": 294 }, { "epoch": 0.03, "learning_rate": 2.9424089898162235e-07, "logits/chosen": -3.0615806579589844, "logits/rejected": -2.8228201866149902, "logps/chosen": -172.1629638671875, "logps/rejected": -169.157958984375, "loss": 0.5479, "rewards/accuracies": 0.875, "rewards/chosen": 0.15541484951972961, "rewards/margins": 0.8552181124687195, "rewards/rejected": -0.6998032331466675, "step": 295 }, { "epoch": 0.03, "learning_rate": 2.942057825119981e-07, "logits/chosen": -2.9466476440429688, "logits/rejected": -3.334681510925293, "logps/chosen": -284.5035095214844, "logps/rejected": -378.8334045410156, "loss": 0.4928, "rewards/accuracies": 0.75, "rewards/chosen": -0.09600576758384705, "rewards/margins": 0.6931609511375427, "rewards/rejected": -0.7891668081283569, "step": 296 }, { "epoch": 0.03, "learning_rate": 2.9417066604237386e-07, "logits/chosen": -3.3863070011138916, "logits/rejected": -3.148373603820801, "logps/chosen": -362.2552795410156, "logps/rejected": -257.8871154785156, "loss": 0.4848, "rewards/accuracies": 0.75, "rewards/chosen": -0.4009194076061249, "rewards/margins": 0.7932018041610718, "rewards/rejected": -1.1941211223602295, "step": 297 }, { "epoch": 0.03, "learning_rate": 2.941355495727496e-07, "logits/chosen": -2.9037632942199707, "logits/rejected": -2.832989454269409, "logps/chosen": -132.77593994140625, "logps/rejected": -139.24984741210938, "loss": 0.5631, "rewards/accuracies": 0.875, "rewards/chosen": 0.016427859663963318, "rewards/margins": 0.5653521418571472, "rewards/rejected": -0.5489243268966675, "step": 298 }, { "epoch": 0.03, "learning_rate": 2.9410043310312536e-07, "logits/chosen": -3.2077412605285645, "logits/rejected": -3.107977867126465, "logps/chosen": -345.524658203125, "logps/rejected": -291.3341979980469, "loss": 0.3868, "rewards/accuracies": 0.875, "rewards/chosen": -0.014534756541252136, "rewards/margins": 1.1889867782592773, "rewards/rejected": -1.203521490097046, "step": 299 }, { "epoch": 0.03, "learning_rate": 2.940653166335011e-07, "logits/chosen": -3.352492094039917, "logits/rejected": -3.109666347503662, "logps/chosen": -362.8140563964844, "logps/rejected": -252.06201171875, "loss": 0.5785, "rewards/accuracies": 0.75, "rewards/chosen": -0.18650507926940918, "rewards/margins": 0.3064579665660858, "rewards/rejected": -0.4929630160331726, "step": 300 }, { "epoch": 0.03, "learning_rate": 2.9403020016387687e-07, "logits/chosen": -2.6634249687194824, "logits/rejected": -2.7344226837158203, "logps/chosen": -356.1751403808594, "logps/rejected": -317.7835388183594, "loss": 0.4404, "rewards/accuracies": 0.875, "rewards/chosen": -0.09449952095746994, "rewards/margins": 0.855010986328125, "rewards/rejected": -0.9495104551315308, "step": 301 }, { "epoch": 0.03, "learning_rate": 2.9399508369425257e-07, "logits/chosen": -2.7083141803741455, "logits/rejected": -3.0024375915527344, "logps/chosen": -274.20294189453125, "logps/rejected": -202.3379364013672, "loss": 0.6559, "rewards/accuracies": 0.75, "rewards/chosen": 0.01950082927942276, "rewards/margins": 0.9511275887489319, "rewards/rejected": -0.9316267371177673, "step": 302 }, { "epoch": 0.03, "learning_rate": 2.939599672246283e-07, "logits/chosen": -2.9776711463928223, "logits/rejected": -2.837388277053833, "logps/chosen": -258.4771423339844, "logps/rejected": -188.95675659179688, "loss": 0.5883, "rewards/accuracies": 0.75, "rewards/chosen": 0.0939856544137001, "rewards/margins": 0.25986289978027344, "rewards/rejected": -0.16587725281715393, "step": 303 }, { "epoch": 0.04, "learning_rate": 2.939248507550041e-07, "logits/chosen": -2.441681385040283, "logits/rejected": -2.4628002643585205, "logps/chosen": -259.7264404296875, "logps/rejected": -250.77215576171875, "loss": 0.6129, "rewards/accuracies": 0.625, "rewards/chosen": -0.22008131444454193, "rewards/margins": 0.3451223075389862, "rewards/rejected": -0.5652036070823669, "step": 304 }, { "epoch": 0.04, "learning_rate": 2.9388973428537983e-07, "logits/chosen": -3.7031519412994385, "logits/rejected": -3.6959173679351807, "logps/chosen": -177.1024169921875, "logps/rejected": -128.32034301757812, "loss": 1.1963, "rewards/accuracies": 0.5, "rewards/chosen": -1.0959892272949219, "rewards/margins": -0.4990222156047821, "rewards/rejected": -0.5969669818878174, "step": 305 }, { "epoch": 0.04, "learning_rate": 2.938546178157556e-07, "logits/chosen": -3.327065944671631, "logits/rejected": -3.6325559616088867, "logps/chosen": -123.60502624511719, "logps/rejected": -213.4351806640625, "loss": 0.2347, "rewards/accuracies": 1.0, "rewards/chosen": 0.05670592561364174, "rewards/margins": 1.4688411951065063, "rewards/rejected": -1.4121352434158325, "step": 306 }, { "epoch": 0.04, "learning_rate": 2.9381950134613134e-07, "logits/chosen": -2.4592673778533936, "logits/rejected": -2.374279499053955, "logps/chosen": -344.15069580078125, "logps/rejected": -383.67718505859375, "loss": 0.4861, "rewards/accuracies": 0.75, "rewards/chosen": -0.1637369990348816, "rewards/margins": 0.6602519154548645, "rewards/rejected": -0.8239889144897461, "step": 307 }, { "epoch": 0.04, "learning_rate": 2.9378438487650704e-07, "logits/chosen": -2.758707046508789, "logits/rejected": -2.822021245956421, "logps/chosen": -153.11996459960938, "logps/rejected": -148.84185791015625, "loss": 0.6439, "rewards/accuracies": 0.5, "rewards/chosen": -0.009745601564645767, "rewards/margins": 0.306857168674469, "rewards/rejected": -0.31660276651382446, "step": 308 }, { "epoch": 0.04, "learning_rate": 2.937492684068828e-07, "logits/chosen": -2.9479808807373047, "logits/rejected": -2.8663699626922607, "logps/chosen": -219.19906616210938, "logps/rejected": -187.76983642578125, "loss": 0.4953, "rewards/accuracies": 0.75, "rewards/chosen": -0.1511968970298767, "rewards/margins": 0.6809184551239014, "rewards/rejected": -0.8321153521537781, "step": 309 }, { "epoch": 0.04, "learning_rate": 2.9371415193725855e-07, "logits/chosen": -3.3738842010498047, "logits/rejected": -3.1222164630889893, "logps/chosen": -193.66009521484375, "logps/rejected": -175.98611450195312, "loss": 0.6653, "rewards/accuracies": 0.5, "rewards/chosen": -0.3349013924598694, "rewards/margins": 0.16812512278556824, "rewards/rejected": -0.5030264854431152, "step": 310 }, { "epoch": 0.04, "learning_rate": 2.936790354676343e-07, "logits/chosen": -3.5526282787323, "logits/rejected": -3.445509195327759, "logps/chosen": -123.62091064453125, "logps/rejected": -180.59898376464844, "loss": 0.8308, "rewards/accuracies": 0.625, "rewards/chosen": -0.4425463080406189, "rewards/margins": 0.6086779236793518, "rewards/rejected": -1.0512242317199707, "step": 311 }, { "epoch": 0.04, "learning_rate": 2.9364391899801006e-07, "logits/chosen": -3.2245140075683594, "logits/rejected": -3.179724931716919, "logps/chosen": -460.0415344238281, "logps/rejected": -438.45245361328125, "loss": 0.8796, "rewards/accuracies": 0.75, "rewards/chosen": -0.3963492810726166, "rewards/margins": 0.3340432047843933, "rewards/rejected": -0.7303924560546875, "step": 312 }, { "epoch": 0.04, "learning_rate": 2.936088025283858e-07, "logits/chosen": -3.1400339603424072, "logits/rejected": -3.1425466537475586, "logps/chosen": -239.12672424316406, "logps/rejected": -219.86468505859375, "loss": 0.4588, "rewards/accuracies": 0.625, "rewards/chosen": -0.13392484188079834, "rewards/margins": 0.8361534476280212, "rewards/rejected": -0.9700783491134644, "step": 313 }, { "epoch": 0.04, "learning_rate": 2.9357368605876157e-07, "logits/chosen": -2.6414456367492676, "logits/rejected": -2.6109519004821777, "logps/chosen": -253.28993225097656, "logps/rejected": -312.7982482910156, "loss": 0.4628, "rewards/accuracies": 0.625, "rewards/chosen": 0.2580501437187195, "rewards/margins": 0.7300293445587158, "rewards/rejected": -0.4719792306423187, "step": 314 }, { "epoch": 0.04, "learning_rate": 2.935385695891373e-07, "logits/chosen": -3.2817306518554688, "logits/rejected": -3.5374622344970703, "logps/chosen": -157.21253967285156, "logps/rejected": -307.54412841796875, "loss": 0.3245, "rewards/accuracies": 1.0, "rewards/chosen": 0.08705174922943115, "rewards/margins": 1.1963064670562744, "rewards/rejected": -1.1092548370361328, "step": 315 }, { "epoch": 0.04, "learning_rate": 2.93503453119513e-07, "logits/chosen": -3.5193703174591064, "logits/rejected": -3.158482551574707, "logps/chosen": -292.23297119140625, "logps/rejected": -132.8345184326172, "loss": 0.6632, "rewards/accuracies": 0.75, "rewards/chosen": -0.08253279328346252, "rewards/margins": 0.4941173791885376, "rewards/rejected": -0.5766501426696777, "step": 316 }, { "epoch": 0.04, "learning_rate": 2.934683366498888e-07, "logits/chosen": -2.743800640106201, "logits/rejected": -3.1043996810913086, "logps/chosen": -217.6492919921875, "logps/rejected": -310.0658874511719, "loss": 0.4752, "rewards/accuracies": 0.625, "rewards/chosen": -0.18808509409427643, "rewards/margins": 0.8832048773765564, "rewards/rejected": -1.0712898969650269, "step": 317 }, { "epoch": 0.04, "learning_rate": 2.9343322018026453e-07, "logits/chosen": -2.9949402809143066, "logits/rejected": -3.3514065742492676, "logps/chosen": -113.5266342163086, "logps/rejected": -246.15701293945312, "loss": 0.3837, "rewards/accuracies": 0.875, "rewards/chosen": 0.2799297869205475, "rewards/margins": 1.240370512008667, "rewards/rejected": -0.9604406952857971, "step": 318 }, { "epoch": 0.04, "learning_rate": 2.933981037106403e-07, "logits/chosen": -2.973745346069336, "logits/rejected": -3.2470593452453613, "logps/chosen": -384.97125244140625, "logps/rejected": -464.354248046875, "loss": 0.3641, "rewards/accuracies": 0.875, "rewards/chosen": 0.058614302426576614, "rewards/margins": 1.1149569749832153, "rewards/rejected": -1.0563428401947021, "step": 319 }, { "epoch": 0.04, "learning_rate": 2.9336298724101604e-07, "logits/chosen": -3.272603988647461, "logits/rejected": -3.3869433403015137, "logps/chosen": -194.43154907226562, "logps/rejected": -215.8647003173828, "loss": 0.888, "rewards/accuracies": 0.75, "rewards/chosen": -0.3865167498588562, "rewards/margins": -0.10005658864974976, "rewards/rejected": -0.2864602208137512, "step": 320 }, { "epoch": 0.04, "learning_rate": 2.9332787077139174e-07, "logits/chosen": -3.1690454483032227, "logits/rejected": -3.3065826892852783, "logps/chosen": -237.34722900390625, "logps/rejected": -217.1185302734375, "loss": 0.5791, "rewards/accuracies": 0.625, "rewards/chosen": -0.2768930494785309, "rewards/margins": 0.6962430477142334, "rewards/rejected": -0.9731361269950867, "step": 321 }, { "epoch": 0.04, "learning_rate": 2.9329275430176754e-07, "logits/chosen": -2.8331990242004395, "logits/rejected": -3.181321144104004, "logps/chosen": -310.63226318359375, "logps/rejected": -353.94482421875, "loss": 0.3286, "rewards/accuracies": 0.875, "rewards/chosen": 0.16542020440101624, "rewards/margins": 1.615039348602295, "rewards/rejected": -1.449619174003601, "step": 322 }, { "epoch": 0.04, "learning_rate": 2.9325763783214324e-07, "logits/chosen": -3.0022006034851074, "logits/rejected": -3.0283021926879883, "logps/chosen": -318.5064697265625, "logps/rejected": -284.5988464355469, "loss": 0.3804, "rewards/accuracies": 1.0, "rewards/chosen": 0.1324634701013565, "rewards/margins": 1.1459918022155762, "rewards/rejected": -1.0135283470153809, "step": 323 }, { "epoch": 0.04, "learning_rate": 2.93222521362519e-07, "logits/chosen": -2.5371994972229004, "logits/rejected": -2.9142589569091797, "logps/chosen": -197.29006958007812, "logps/rejected": -157.74923706054688, "loss": 0.7642, "rewards/accuracies": 0.625, "rewards/chosen": 0.08228358626365662, "rewards/margins": 0.06503608822822571, "rewards/rejected": 0.017247512936592102, "step": 324 }, { "epoch": 0.04, "learning_rate": 2.9318740489289475e-07, "logits/chosen": -3.585228443145752, "logits/rejected": -3.0260109901428223, "logps/chosen": -302.4771728515625, "logps/rejected": -254.6261749267578, "loss": 0.7624, "rewards/accuracies": 0.625, "rewards/chosen": -0.40179863572120667, "rewards/margins": 0.38462257385253906, "rewards/rejected": -0.7864211797714233, "step": 325 }, { "epoch": 0.04, "learning_rate": 2.931522884232705e-07, "logits/chosen": -3.8819375038146973, "logits/rejected": -3.7814290523529053, "logps/chosen": -371.606689453125, "logps/rejected": -302.6039733886719, "loss": 0.311, "rewards/accuracies": 0.75, "rewards/chosen": 0.41315630078315735, "rewards/margins": 2.109727382659912, "rewards/rejected": -1.6965712308883667, "step": 326 }, { "epoch": 0.04, "learning_rate": 2.9311717195364626e-07, "logits/chosen": -2.563169002532959, "logits/rejected": -2.597715377807617, "logps/chosen": -199.35897827148438, "logps/rejected": -175.29971313476562, "loss": 0.6096, "rewards/accuracies": 0.75, "rewards/chosen": -0.01657724380493164, "rewards/margins": 0.35831472277641296, "rewards/rejected": -0.374891996383667, "step": 327 }, { "epoch": 0.04, "learning_rate": 2.93082055484022e-07, "logits/chosen": -2.8802742958068848, "logits/rejected": -3.2123565673828125, "logps/chosen": -223.31463623046875, "logps/rejected": -155.9239501953125, "loss": 0.5391, "rewards/accuracies": 0.875, "rewards/chosen": 0.2656763195991516, "rewards/margins": 0.6253472566604614, "rewards/rejected": -0.3596709370613098, "step": 328 }, { "epoch": 0.04, "learning_rate": 2.930469390143977e-07, "logits/chosen": -3.3330438137054443, "logits/rejected": -3.0752158164978027, "logps/chosen": -232.72360229492188, "logps/rejected": -206.1331024169922, "loss": 1.0731, "rewards/accuracies": 0.25, "rewards/chosen": -0.4577289819717407, "rewards/margins": -0.13812890648841858, "rewards/rejected": -0.31960004568099976, "step": 329 }, { "epoch": 0.04, "learning_rate": 2.9301182254477347e-07, "logits/chosen": -3.23341703414917, "logits/rejected": -3.2092065811157227, "logps/chosen": -385.8639831542969, "logps/rejected": -205.04293823242188, "loss": 0.6108, "rewards/accuracies": 0.625, "rewards/chosen": -0.08751187473535538, "rewards/margins": 0.5633958578109741, "rewards/rejected": -0.6509077548980713, "step": 330 }, { "epoch": 0.04, "learning_rate": 2.929767060751492e-07, "logits/chosen": -3.0862948894500732, "logits/rejected": -3.3389623165130615, "logps/chosen": -256.924560546875, "logps/rejected": -216.75375366210938, "loss": 0.5746, "rewards/accuracies": 0.625, "rewards/chosen": -0.1574631929397583, "rewards/margins": 0.3690336346626282, "rewards/rejected": -0.5264968872070312, "step": 331 }, { "epoch": 0.04, "learning_rate": 2.92941589605525e-07, "logits/chosen": -3.5357768535614014, "logits/rejected": -3.564411163330078, "logps/chosen": -222.9327392578125, "logps/rejected": -239.39614868164062, "loss": 0.5261, "rewards/accuracies": 0.75, "rewards/chosen": 0.033870432525873184, "rewards/margins": 0.4337180554866791, "rewards/rejected": -0.3998476266860962, "step": 332 }, { "epoch": 0.04, "learning_rate": 2.9290647313590073e-07, "logits/chosen": -2.92166805267334, "logits/rejected": -2.8673064708709717, "logps/chosen": -154.33761596679688, "logps/rejected": -151.41603088378906, "loss": 0.6343, "rewards/accuracies": 0.375, "rewards/chosen": -0.17431049048900604, "rewards/margins": 0.2438768744468689, "rewards/rejected": -0.41818737983703613, "step": 333 }, { "epoch": 0.04, "learning_rate": 2.928713566662765e-07, "logits/chosen": -2.942204236984253, "logits/rejected": -2.9921658039093018, "logps/chosen": -134.6187286376953, "logps/rejected": -168.8784942626953, "loss": 0.539, "rewards/accuracies": 0.875, "rewards/chosen": 0.11122285574674606, "rewards/margins": 0.698149561882019, "rewards/rejected": -0.5869267582893372, "step": 334 }, { "epoch": 0.04, "learning_rate": 2.9283624019665224e-07, "logits/chosen": -3.1781651973724365, "logits/rejected": -3.4687681198120117, "logps/chosen": -269.4075012207031, "logps/rejected": -220.25637817382812, "loss": 0.5999, "rewards/accuracies": 0.625, "rewards/chosen": -0.27838900685310364, "rewards/margins": 0.41326144337654114, "rewards/rejected": -0.6916504502296448, "step": 335 }, { "epoch": 0.04, "learning_rate": 2.92801123727028e-07, "logits/chosen": -3.096864700317383, "logits/rejected": -3.032203197479248, "logps/chosen": -232.36148071289062, "logps/rejected": -293.11767578125, "loss": 0.3994, "rewards/accuracies": 0.875, "rewards/chosen": 0.10393879562616348, "rewards/margins": 1.0696824789047241, "rewards/rejected": -0.9657436609268188, "step": 336 }, { "epoch": 0.04, "learning_rate": 2.927660072574037e-07, "logits/chosen": -2.461975574493408, "logits/rejected": -2.61027193069458, "logps/chosen": -292.6427307128906, "logps/rejected": -256.63531494140625, "loss": 0.4423, "rewards/accuracies": 0.875, "rewards/chosen": 0.2760024070739746, "rewards/margins": 0.7060391902923584, "rewards/rejected": -0.4300367832183838, "step": 337 }, { "epoch": 0.04, "learning_rate": 2.9273089078777945e-07, "logits/chosen": -2.866392135620117, "logits/rejected": -2.8920254707336426, "logps/chosen": -339.0001220703125, "logps/rejected": -342.10589599609375, "loss": 0.5154, "rewards/accuracies": 0.5, "rewards/chosen": 0.14709237217903137, "rewards/margins": 0.6183605194091797, "rewards/rejected": -0.4712681174278259, "step": 338 }, { "epoch": 0.04, "learning_rate": 2.926957743181552e-07, "logits/chosen": -3.3417139053344727, "logits/rejected": -3.105760097503662, "logps/chosen": -403.8670654296875, "logps/rejected": -291.6964111328125, "loss": 0.3191, "rewards/accuracies": 0.875, "rewards/chosen": 0.17001502215862274, "rewards/margins": 1.4750672578811646, "rewards/rejected": -1.3050522804260254, "step": 339 }, { "epoch": 0.04, "learning_rate": 2.9266065784853095e-07, "logits/chosen": -3.247652769088745, "logits/rejected": -3.1947481632232666, "logps/chosen": -252.56202697753906, "logps/rejected": -177.67776489257812, "loss": 0.7384, "rewards/accuracies": 0.625, "rewards/chosen": -0.3070858418941498, "rewards/margins": -0.05510035157203674, "rewards/rejected": -0.25198546051979065, "step": 340 }, { "epoch": 0.04, "learning_rate": 2.926255413789067e-07, "logits/chosen": -3.2172679901123047, "logits/rejected": -3.0673675537109375, "logps/chosen": -333.4479675292969, "logps/rejected": -195.57716369628906, "loss": 0.7629, "rewards/accuracies": 0.75, "rewards/chosen": -0.3991571366786957, "rewards/margins": 0.2618064284324646, "rewards/rejected": -0.6609635353088379, "step": 341 }, { "epoch": 0.04, "learning_rate": 2.925904249092824e-07, "logits/chosen": -2.3871593475341797, "logits/rejected": -2.2753219604492188, "logps/chosen": -235.65164184570312, "logps/rejected": -251.76060485839844, "loss": 0.5954, "rewards/accuracies": 0.5, "rewards/chosen": 0.07159613817930222, "rewards/margins": 0.6409983038902283, "rewards/rejected": -0.5694020986557007, "step": 342 }, { "epoch": 0.04, "learning_rate": 2.9255530843965816e-07, "logits/chosen": -2.857883930206299, "logits/rejected": -2.715831756591797, "logps/chosen": -177.47262573242188, "logps/rejected": -256.6138916015625, "loss": 0.5161, "rewards/accuracies": 0.75, "rewards/chosen": -0.012427326291799545, "rewards/margins": 0.903405487537384, "rewards/rejected": -0.9158328175544739, "step": 343 }, { "epoch": 0.04, "learning_rate": 2.9252019197003397e-07, "logits/chosen": -3.578307628631592, "logits/rejected": -3.40179443359375, "logps/chosen": -352.6904296875, "logps/rejected": -267.75433349609375, "loss": 0.3813, "rewards/accuracies": 0.875, "rewards/chosen": 0.032896608114242554, "rewards/margins": 1.2026045322418213, "rewards/rejected": -1.1697078943252563, "step": 344 }, { "epoch": 0.04, "learning_rate": 2.9248507550040967e-07, "logits/chosen": -3.159625291824341, "logits/rejected": -2.9712705612182617, "logps/chosen": -348.6385803222656, "logps/rejected": -334.8789367675781, "loss": 0.6667, "rewards/accuracies": 0.625, "rewards/chosen": -0.41008687019348145, "rewards/margins": 0.26367512345314026, "rewards/rejected": -0.6737619638442993, "step": 345 }, { "epoch": 0.04, "learning_rate": 2.924499590307854e-07, "logits/chosen": -3.3857009410858154, "logits/rejected": -3.3708343505859375, "logps/chosen": -460.67333984375, "logps/rejected": -311.1851806640625, "loss": 0.5569, "rewards/accuracies": 0.625, "rewards/chosen": 0.07098651677370071, "rewards/margins": 0.44480839371681213, "rewards/rejected": -0.3738219141960144, "step": 346 }, { "epoch": 0.04, "learning_rate": 2.924148425611612e-07, "logits/chosen": -3.080620527267456, "logits/rejected": -2.8345484733581543, "logps/chosen": -203.33914184570312, "logps/rejected": -228.475830078125, "loss": 0.6748, "rewards/accuracies": 0.75, "rewards/chosen": -0.16161608695983887, "rewards/margins": 0.23351091146469116, "rewards/rejected": -0.39512699842453003, "step": 347 }, { "epoch": 0.04, "learning_rate": 2.9237972609153693e-07, "logits/chosen": -2.760406017303467, "logits/rejected": -2.6120376586914062, "logps/chosen": -194.50914001464844, "logps/rejected": -211.8148956298828, "loss": 0.5653, "rewards/accuracies": 0.625, "rewards/chosen": -0.15548710525035858, "rewards/margins": 0.4855707287788391, "rewards/rejected": -0.6410577893257141, "step": 348 }, { "epoch": 0.04, "learning_rate": 2.923446096219127e-07, "logits/chosen": -3.844106912612915, "logits/rejected": -3.606504440307617, "logps/chosen": -288.6073913574219, "logps/rejected": -164.28311157226562, "loss": 0.5344, "rewards/accuracies": 0.75, "rewards/chosen": 0.09621953964233398, "rewards/margins": 0.638676106929779, "rewards/rejected": -0.5424565672874451, "step": 349 }, { "epoch": 0.04, "learning_rate": 2.923094931522884e-07, "logits/chosen": -3.577012062072754, "logits/rejected": -3.207430601119995, "logps/chosen": -405.03460693359375, "logps/rejected": -258.4103698730469, "loss": 0.7757, "rewards/accuracies": 0.625, "rewards/chosen": -0.5443931221961975, "rewards/margins": 0.13326036930084229, "rewards/rejected": -0.677653431892395, "step": 350 }, { "epoch": 0.04, "learning_rate": 2.9227437668266414e-07, "logits/chosen": -3.124886989593506, "logits/rejected": -3.310025215148926, "logps/chosen": -461.0048828125, "logps/rejected": -202.38067626953125, "loss": 0.5395, "rewards/accuracies": 0.625, "rewards/chosen": 0.037668608129024506, "rewards/margins": 0.6605635285377502, "rewards/rejected": -0.6228950023651123, "step": 351 }, { "epoch": 0.04, "learning_rate": 2.922392602130399e-07, "logits/chosen": -3.377054452896118, "logits/rejected": -3.477799892425537, "logps/chosen": -201.81796264648438, "logps/rejected": -220.58404541015625, "loss": 0.4115, "rewards/accuracies": 0.75, "rewards/chosen": 0.025179080665111542, "rewards/margins": 1.3641031980514526, "rewards/rejected": -1.3389240503311157, "step": 352 }, { "epoch": 0.04, "learning_rate": 2.9220414374341565e-07, "logits/chosen": -3.078449010848999, "logits/rejected": -2.9485878944396973, "logps/chosen": -204.41680908203125, "logps/rejected": -214.7860107421875, "loss": 0.5579, "rewards/accuracies": 0.75, "rewards/chosen": -0.08494075387716293, "rewards/margins": 0.5809195041656494, "rewards/rejected": -0.6658602952957153, "step": 353 }, { "epoch": 0.04, "learning_rate": 2.921690272737914e-07, "logits/chosen": -3.243419647216797, "logits/rejected": -3.274054527282715, "logps/chosen": -169.3918914794922, "logps/rejected": -187.13702392578125, "loss": 0.4455, "rewards/accuracies": 0.875, "rewards/chosen": -0.22083498537540436, "rewards/margins": 0.7169981598854065, "rewards/rejected": -0.9378331899642944, "step": 354 }, { "epoch": 0.04, "learning_rate": 2.921339108041671e-07, "logits/chosen": -3.3823444843292236, "logits/rejected": -3.5137081146240234, "logps/chosen": -221.2010040283203, "logps/rejected": -229.50128173828125, "loss": 0.7663, "rewards/accuracies": 0.75, "rewards/chosen": -0.24071770906448364, "rewards/margins": 0.40651610493659973, "rewards/rejected": -0.647233784198761, "step": 355 }, { "epoch": 0.04, "learning_rate": 2.920987943345429e-07, "logits/chosen": -3.289555311203003, "logits/rejected": -3.1732425689697266, "logps/chosen": -132.65480041503906, "logps/rejected": -264.6292724609375, "loss": 0.3568, "rewards/accuracies": 0.875, "rewards/chosen": 0.07612219452857971, "rewards/margins": 1.1061369180679321, "rewards/rejected": -1.0300146341323853, "step": 356 }, { "epoch": 0.04, "learning_rate": 2.9206367786491866e-07, "logits/chosen": -3.3427963256835938, "logits/rejected": -3.0530571937561035, "logps/chosen": -316.4560852050781, "logps/rejected": -277.9018249511719, "loss": 0.6342, "rewards/accuracies": 0.5, "rewards/chosen": -0.07268981635570526, "rewards/margins": 0.29800957441329956, "rewards/rejected": -0.37069937586784363, "step": 357 }, { "epoch": 0.04, "learning_rate": 2.9202856139529436e-07, "logits/chosen": -3.4742090702056885, "logits/rejected": -3.632537364959717, "logps/chosen": -139.5057373046875, "logps/rejected": -161.68316650390625, "loss": 0.5034, "rewards/accuracies": 0.75, "rewards/chosen": -0.22021833062171936, "rewards/margins": 0.7397069931030273, "rewards/rejected": -0.9599252939224243, "step": 358 }, { "epoch": 0.04, "learning_rate": 2.919934449256701e-07, "logits/chosen": -2.5446934700012207, "logits/rejected": -2.332984685897827, "logps/chosen": -389.1212463378906, "logps/rejected": -364.28302001953125, "loss": 0.302, "rewards/accuracies": 0.875, "rewards/chosen": 0.3573048710823059, "rewards/margins": 1.635549783706665, "rewards/rejected": -1.278244972229004, "step": 359 }, { "epoch": 0.04, "learning_rate": 2.9195832845604587e-07, "logits/chosen": -3.7903034687042236, "logits/rejected": -3.8577733039855957, "logps/chosen": -373.8998718261719, "logps/rejected": -301.3363037109375, "loss": 0.5067, "rewards/accuracies": 0.75, "rewards/chosen": -0.1959686279296875, "rewards/margins": 0.7763450741767883, "rewards/rejected": -0.9723137617111206, "step": 360 }, { "epoch": 0.04, "learning_rate": 2.919232119864216e-07, "logits/chosen": -2.3711633682250977, "logits/rejected": -2.368687868118286, "logps/chosen": -531.5543212890625, "logps/rejected": -358.6279602050781, "loss": 0.5601, "rewards/accuracies": 0.75, "rewards/chosen": -0.21405896544456482, "rewards/margins": 0.38602346181869507, "rewards/rejected": -0.6000823974609375, "step": 361 }, { "epoch": 0.04, "learning_rate": 2.918880955167974e-07, "logits/chosen": -3.5251173973083496, "logits/rejected": -3.3067636489868164, "logps/chosen": -172.3519287109375, "logps/rejected": -227.97776794433594, "loss": 0.7153, "rewards/accuracies": 0.625, "rewards/chosen": -0.42045795917510986, "rewards/margins": 0.45866432785987854, "rewards/rejected": -0.879122257232666, "step": 362 }, { "epoch": 0.04, "learning_rate": 2.918529790471731e-07, "logits/chosen": -2.5726752281188965, "logits/rejected": -2.690509557723999, "logps/chosen": -284.4325866699219, "logps/rejected": -220.40345764160156, "loss": 0.5036, "rewards/accuracies": 0.75, "rewards/chosen": 0.11083866655826569, "rewards/margins": 0.5766516923904419, "rewards/rejected": -0.4658130705356598, "step": 363 }, { "epoch": 0.04, "learning_rate": 2.9181786257754883e-07, "logits/chosen": -2.8000705242156982, "logits/rejected": -3.038846254348755, "logps/chosen": -389.9476623535156, "logps/rejected": -282.806640625, "loss": 0.412, "rewards/accuracies": 0.875, "rewards/chosen": 0.004527613520622253, "rewards/margins": 0.9298328161239624, "rewards/rejected": -0.925305187702179, "step": 364 }, { "epoch": 0.04, "learning_rate": 2.9178274610792464e-07, "logits/chosen": -2.6487743854522705, "logits/rejected": -2.543379306793213, "logps/chosen": -179.88192749023438, "logps/rejected": -157.98568725585938, "loss": 0.7515, "rewards/accuracies": 0.5, "rewards/chosen": -0.06328187882900238, "rewards/margins": -0.03620663285255432, "rewards/rejected": -0.027075227349996567, "step": 365 }, { "epoch": 0.04, "learning_rate": 2.9174762963830034e-07, "logits/chosen": -2.91072416305542, "logits/rejected": -2.879549980163574, "logps/chosen": -298.82421875, "logps/rejected": -288.4239501953125, "loss": 0.5432, "rewards/accuracies": 0.75, "rewards/chosen": -0.29703912138938904, "rewards/margins": 0.5171013474464417, "rewards/rejected": -0.8141404390335083, "step": 366 }, { "epoch": 0.04, "learning_rate": 2.917125131686761e-07, "logits/chosen": -2.7178382873535156, "logits/rejected": -2.916861057281494, "logps/chosen": -306.9468688964844, "logps/rejected": -274.46038818359375, "loss": 0.5149, "rewards/accuracies": 0.875, "rewards/chosen": 0.25449931621551514, "rewards/margins": 0.41928577423095703, "rewards/rejected": -0.1647864282131195, "step": 367 }, { "epoch": 0.04, "learning_rate": 2.9167739669905185e-07, "logits/chosen": -2.8936867713928223, "logits/rejected": -2.91463565826416, "logps/chosen": -209.67076110839844, "logps/rejected": -203.77996826171875, "loss": 0.5007, "rewards/accuracies": 0.75, "rewards/chosen": -0.320660263299942, "rewards/margins": 0.5525835156440735, "rewards/rejected": -0.8732438087463379, "step": 368 }, { "epoch": 0.04, "learning_rate": 2.916422802294276e-07, "logits/chosen": -3.2346396446228027, "logits/rejected": -3.1034178733825684, "logps/chosen": -275.64398193359375, "logps/rejected": -348.3328552246094, "loss": 0.4424, "rewards/accuracies": 0.75, "rewards/chosen": 0.2597840428352356, "rewards/margins": 0.8506712913513184, "rewards/rejected": -0.590887188911438, "step": 369 }, { "epoch": 0.04, "learning_rate": 2.9160716375980336e-07, "logits/chosen": -3.795283317565918, "logits/rejected": -3.494300365447998, "logps/chosen": -302.0039978027344, "logps/rejected": -218.86351013183594, "loss": 0.7233, "rewards/accuracies": 0.375, "rewards/chosen": -0.3841859698295593, "rewards/margins": 0.42547863721847534, "rewards/rejected": -0.8096646070480347, "step": 370 }, { "epoch": 0.04, "learning_rate": 2.9157204729017906e-07, "logits/chosen": -2.6999711990356445, "logits/rejected": -3.0116143226623535, "logps/chosen": -181.87298583984375, "logps/rejected": -226.5128631591797, "loss": 0.3867, "rewards/accuracies": 0.75, "rewards/chosen": -0.10978382080793381, "rewards/margins": 0.9570813179016113, "rewards/rejected": -1.066865086555481, "step": 371 }, { "epoch": 0.04, "learning_rate": 2.915369308205548e-07, "logits/chosen": -2.8200936317443848, "logits/rejected": -2.8534929752349854, "logps/chosen": -332.4095458984375, "logps/rejected": -368.05645751953125, "loss": 0.5555, "rewards/accuracies": 0.625, "rewards/chosen": -0.07119980454444885, "rewards/margins": 0.6547142267227173, "rewards/rejected": -0.7259140014648438, "step": 372 }, { "epoch": 0.04, "learning_rate": 2.9150181435093057e-07, "logits/chosen": -2.7920491695404053, "logits/rejected": -2.6613986492156982, "logps/chosen": -183.70977783203125, "logps/rejected": -357.7785949707031, "loss": 0.4618, "rewards/accuracies": 0.625, "rewards/chosen": 0.13575685024261475, "rewards/margins": 0.8295263051986694, "rewards/rejected": -0.6937694549560547, "step": 373 }, { "epoch": 0.04, "learning_rate": 2.914666978813063e-07, "logits/chosen": -2.8138973712921143, "logits/rejected": -2.895423412322998, "logps/chosen": -274.2352294921875, "logps/rejected": -232.97042846679688, "loss": 0.5258, "rewards/accuracies": 0.75, "rewards/chosen": 0.15725061297416687, "rewards/margins": 0.6544475555419922, "rewards/rejected": -0.4971969723701477, "step": 374 }, { "epoch": 0.04, "learning_rate": 2.914315814116821e-07, "logits/chosen": -3.503725290298462, "logits/rejected": -3.6358609199523926, "logps/chosen": -236.18182373046875, "logps/rejected": -246.78253173828125, "loss": 0.4831, "rewards/accuracies": 0.75, "rewards/chosen": -0.05912458151578903, "rewards/margins": 0.8933229446411133, "rewards/rejected": -0.9524475336074829, "step": 375 }, { "epoch": 0.04, "learning_rate": 2.913964649420578e-07, "logits/chosen": -3.4336366653442383, "logits/rejected": -3.3956403732299805, "logps/chosen": -417.1386413574219, "logps/rejected": -328.87054443359375, "loss": 0.385, "rewards/accuracies": 0.875, "rewards/chosen": 0.12791182100772858, "rewards/margins": 0.9718395471572876, "rewards/rejected": -0.8439276814460754, "step": 376 }, { "epoch": 0.04, "learning_rate": 2.9136134847243353e-07, "logits/chosen": -3.1968047618865967, "logits/rejected": -3.259679079055786, "logps/chosen": -182.16749572753906, "logps/rejected": -164.5848388671875, "loss": 0.7117, "rewards/accuracies": 0.625, "rewards/chosen": -0.05847674608230591, "rewards/margins": 0.059625424444675446, "rewards/rejected": -0.11810217797756195, "step": 377 }, { "epoch": 0.04, "learning_rate": 2.9132623200280934e-07, "logits/chosen": -2.565866231918335, "logits/rejected": -2.755876302719116, "logps/chosen": -429.2481994628906, "logps/rejected": -308.80364990234375, "loss": 0.3832, "rewards/accuracies": 0.75, "rewards/chosen": 0.3412594795227051, "rewards/margins": 1.153958797454834, "rewards/rejected": -0.8126993179321289, "step": 378 }, { "epoch": 0.04, "learning_rate": 2.9129111553318504e-07, "logits/chosen": -2.2944064140319824, "logits/rejected": -2.381772756576538, "logps/chosen": -295.29412841796875, "logps/rejected": -339.4980163574219, "loss": 0.8135, "rewards/accuracies": 0.5, "rewards/chosen": -0.3978363573551178, "rewards/margins": -0.06217159330844879, "rewards/rejected": -0.3356647491455078, "step": 379 }, { "epoch": 0.04, "learning_rate": 2.912559990635608e-07, "logits/chosen": -2.888493537902832, "logits/rejected": -2.8989343643188477, "logps/chosen": -201.9176788330078, "logps/rejected": -308.35211181640625, "loss": 0.5701, "rewards/accuracies": 0.75, "rewards/chosen": 0.16608989238739014, "rewards/margins": 0.47484633326530457, "rewards/rejected": -0.30875641107559204, "step": 380 }, { "epoch": 0.04, "learning_rate": 2.9122088259393654e-07, "logits/chosen": -3.1463208198547363, "logits/rejected": -3.0799243450164795, "logps/chosen": -157.54348754882812, "logps/rejected": -202.6800537109375, "loss": 0.6652, "rewards/accuracies": 0.625, "rewards/chosen": -0.4167552590370178, "rewards/margins": 0.3437081575393677, "rewards/rejected": -0.7604634761810303, "step": 381 }, { "epoch": 0.04, "learning_rate": 2.911857661243123e-07, "logits/chosen": -3.436253547668457, "logits/rejected": -3.355146884918213, "logps/chosen": -207.0049285888672, "logps/rejected": -244.63433837890625, "loss": 0.3903, "rewards/accuracies": 0.875, "rewards/chosen": -0.18851184844970703, "rewards/margins": 1.5959644317626953, "rewards/rejected": -1.7844762802124023, "step": 382 }, { "epoch": 0.04, "learning_rate": 2.9115064965468805e-07, "logits/chosen": -3.5673012733459473, "logits/rejected": -3.224808692932129, "logps/chosen": -281.155517578125, "logps/rejected": -214.9539794921875, "loss": 0.581, "rewards/accuracies": 0.75, "rewards/chosen": -0.10351411998271942, "rewards/margins": 0.37552356719970703, "rewards/rejected": -0.47903767228126526, "step": 383 }, { "epoch": 0.04, "learning_rate": 2.9111553318506375e-07, "logits/chosen": -2.507352352142334, "logits/rejected": -2.582430839538574, "logps/chosen": -398.6578674316406, "logps/rejected": -409.44476318359375, "loss": 0.4568, "rewards/accuracies": 0.875, "rewards/chosen": 0.1252782791852951, "rewards/margins": 1.063739538192749, "rewards/rejected": -0.9384613037109375, "step": 384 }, { "epoch": 0.04, "learning_rate": 2.910804167154395e-07, "logits/chosen": -2.8345272541046143, "logits/rejected": -3.0449700355529785, "logps/chosen": -283.5218505859375, "logps/rejected": -263.2257995605469, "loss": 0.6351, "rewards/accuracies": 0.5, "rewards/chosen": 0.06995394825935364, "rewards/margins": 0.15204322338104248, "rewards/rejected": -0.08208927512168884, "step": 385 }, { "epoch": 0.04, "learning_rate": 2.9104530024581526e-07, "logits/chosen": -2.6289825439453125, "logits/rejected": -2.3539299964904785, "logps/chosen": -276.59149169921875, "logps/rejected": -134.65951538085938, "loss": 0.6826, "rewards/accuracies": 0.5, "rewards/chosen": 0.18538042902946472, "rewards/margins": 0.29335954785346985, "rewards/rejected": -0.10797910392284393, "step": 386 }, { "epoch": 0.04, "learning_rate": 2.91010183776191e-07, "logits/chosen": -2.7838029861450195, "logits/rejected": -2.8201916217803955, "logps/chosen": -176.77011108398438, "logps/rejected": -294.69232177734375, "loss": 0.3345, "rewards/accuracies": 1.0, "rewards/chosen": 0.13179786503314972, "rewards/margins": 1.55593740940094, "rewards/rejected": -1.4241396188735962, "step": 387 }, { "epoch": 0.04, "learning_rate": 2.9097506730656677e-07, "logits/chosen": -3.1612627506256104, "logits/rejected": -3.084688663482666, "logps/chosen": -357.0299377441406, "logps/rejected": -191.01991271972656, "loss": 0.5834, "rewards/accuracies": 0.75, "rewards/chosen": -0.10176057368516922, "rewards/margins": 0.2801131308078766, "rewards/rejected": -0.3818736970424652, "step": 388 }, { "epoch": 0.04, "learning_rate": 2.909399508369425e-07, "logits/chosen": -3.6793336868286133, "logits/rejected": -3.6861114501953125, "logps/chosen": -211.15023803710938, "logps/rejected": -233.69847106933594, "loss": 0.6845, "rewards/accuracies": 0.5, "rewards/chosen": -0.148991197347641, "rewards/margins": 0.3549796938896179, "rewards/rejected": -0.5039708614349365, "step": 389 }, { "epoch": 0.04, "learning_rate": 2.909048343673183e-07, "logits/chosen": -3.301929235458374, "logits/rejected": -3.20796275138855, "logps/chosen": -192.09274291992188, "logps/rejected": -276.6363830566406, "loss": 0.6737, "rewards/accuracies": 0.75, "rewards/chosen": -0.17153462767601013, "rewards/margins": 0.20755133032798767, "rewards/rejected": -0.3790859878063202, "step": 390 }, { "epoch": 0.05, "learning_rate": 2.9086971789769403e-07, "logits/chosen": -2.750441551208496, "logits/rejected": -2.878647804260254, "logps/chosen": -289.1087646484375, "logps/rejected": -254.92759704589844, "loss": 0.4784, "rewards/accuracies": 0.75, "rewards/chosen": -0.03340554237365723, "rewards/margins": 1.09812331199646, "rewards/rejected": -1.1315288543701172, "step": 391 }, { "epoch": 0.05, "learning_rate": 2.9083460142806973e-07, "logits/chosen": -3.4067602157592773, "logits/rejected": -3.2986998558044434, "logps/chosen": -318.7747802734375, "logps/rejected": -295.73565673828125, "loss": 0.5936, "rewards/accuracies": 0.75, "rewards/chosen": -0.47447454929351807, "rewards/margins": 0.43653565645217896, "rewards/rejected": -0.911010205745697, "step": 392 }, { "epoch": 0.05, "learning_rate": 2.907994849584455e-07, "logits/chosen": -3.5612130165100098, "logits/rejected": -3.571927070617676, "logps/chosen": -220.2811279296875, "logps/rejected": -253.1396484375, "loss": 0.4306, "rewards/accuracies": 0.75, "rewards/chosen": 0.21127064526081085, "rewards/margins": 1.0022602081298828, "rewards/rejected": -0.7909895181655884, "step": 393 }, { "epoch": 0.05, "learning_rate": 2.9076436848882124e-07, "logits/chosen": -2.8417909145355225, "logits/rejected": -2.7929465770721436, "logps/chosen": -73.14635467529297, "logps/rejected": -93.77976989746094, "loss": 0.5909, "rewards/accuracies": 0.625, "rewards/chosen": 0.2049848735332489, "rewards/margins": 0.4261936545372009, "rewards/rejected": -0.22120878100395203, "step": 394 }, { "epoch": 0.05, "learning_rate": 2.90729252019197e-07, "logits/chosen": -3.475584030151367, "logits/rejected": -3.029663562774658, "logps/chosen": -192.2339324951172, "logps/rejected": -219.62197875976562, "loss": 0.6004, "rewards/accuracies": 0.625, "rewards/chosen": -0.26847776770591736, "rewards/margins": 0.8090030550956726, "rewards/rejected": -1.0774807929992676, "step": 395 }, { "epoch": 0.05, "learning_rate": 2.9069413554957275e-07, "logits/chosen": -3.025455951690674, "logits/rejected": -2.979712963104248, "logps/chosen": -227.0043487548828, "logps/rejected": -263.66632080078125, "loss": 0.5774, "rewards/accuracies": 0.625, "rewards/chosen": -0.20933493971824646, "rewards/margins": 0.36125776171684265, "rewards/rejected": -0.5705927014350891, "step": 396 }, { "epoch": 0.05, "learning_rate": 2.906590190799485e-07, "logits/chosen": -3.744821071624756, "logits/rejected": -4.103786468505859, "logps/chosen": -135.1412811279297, "logps/rejected": -191.207763671875, "loss": 0.462, "rewards/accuracies": 0.625, "rewards/chosen": -0.15522538125514984, "rewards/margins": 0.7042402029037476, "rewards/rejected": -0.8594655990600586, "step": 397 }, { "epoch": 0.05, "learning_rate": 2.906239026103242e-07, "logits/chosen": -3.234471082687378, "logits/rejected": -3.2841789722442627, "logps/chosen": -265.807861328125, "logps/rejected": -220.61085510253906, "loss": 0.6239, "rewards/accuracies": 0.375, "rewards/chosen": 0.11368418484926224, "rewards/margins": 0.6250712871551514, "rewards/rejected": -0.5113871097564697, "step": 398 }, { "epoch": 0.05, "learning_rate": 2.905887861407e-07, "logits/chosen": -2.9094643592834473, "logits/rejected": -3.1217610836029053, "logps/chosen": -253.38064575195312, "logps/rejected": -321.3524169921875, "loss": 0.6663, "rewards/accuracies": 0.75, "rewards/chosen": -0.1420055478811264, "rewards/margins": 0.3870762884616852, "rewards/rejected": -0.5290818214416504, "step": 399 }, { "epoch": 0.05, "learning_rate": 2.905536696710757e-07, "logits/chosen": -2.2691457271575928, "logits/rejected": -2.476783037185669, "logps/chosen": -314.0921325683594, "logps/rejected": -231.2151336669922, "loss": 0.728, "rewards/accuracies": 0.625, "rewards/chosen": -0.17998142540454865, "rewards/margins": 0.1350838989019394, "rewards/rejected": -0.31506532430648804, "step": 400 }, { "epoch": 0.05, "learning_rate": 2.9051855320145146e-07, "logits/chosen": -3.514326572418213, "logits/rejected": -3.358855724334717, "logps/chosen": -347.7041015625, "logps/rejected": -287.153076171875, "loss": 0.3041, "rewards/accuracies": 1.0, "rewards/chosen": 0.10987290740013123, "rewards/margins": 1.2267987728118896, "rewards/rejected": -1.116925835609436, "step": 401 }, { "epoch": 0.05, "learning_rate": 2.904834367318272e-07, "logits/chosen": -3.029038190841675, "logits/rejected": -3.052837371826172, "logps/chosen": -172.21746826171875, "logps/rejected": -153.2972869873047, "loss": 0.764, "rewards/accuracies": 0.5, "rewards/chosen": -0.2717152535915375, "rewards/margins": -0.08527862280607224, "rewards/rejected": -0.18643663823604584, "step": 402 }, { "epoch": 0.05, "learning_rate": 2.9044832026220297e-07, "logits/chosen": -3.0615785121917725, "logits/rejected": -3.109618663787842, "logps/chosen": -175.44223022460938, "logps/rejected": -245.84722900390625, "loss": 0.5672, "rewards/accuracies": 0.625, "rewards/chosen": 0.17322120070457458, "rewards/margins": 0.8645422458648682, "rewards/rejected": -0.6913211345672607, "step": 403 }, { "epoch": 0.05, "learning_rate": 2.904132037925787e-07, "logits/chosen": -3.5077638626098633, "logits/rejected": -3.146940231323242, "logps/chosen": -317.1956787109375, "logps/rejected": -176.7849578857422, "loss": 0.3104, "rewards/accuracies": 1.0, "rewards/chosen": 0.2571291923522949, "rewards/margins": 1.1085937023162842, "rewards/rejected": -0.8514645099639893, "step": 404 }, { "epoch": 0.05, "learning_rate": 2.903780873229545e-07, "logits/chosen": -3.766819953918457, "logits/rejected": -3.6343953609466553, "logps/chosen": -375.5865783691406, "logps/rejected": -191.43911743164062, "loss": 0.3751, "rewards/accuracies": 0.875, "rewards/chosen": -0.11596641689538956, "rewards/margins": 1.363093614578247, "rewards/rejected": -1.4790599346160889, "step": 405 }, { "epoch": 0.05, "learning_rate": 2.903429708533302e-07, "logits/chosen": -2.3069400787353516, "logits/rejected": -2.37493634223938, "logps/chosen": -233.0413818359375, "logps/rejected": -187.205322265625, "loss": 0.5951, "rewards/accuracies": 0.625, "rewards/chosen": -0.021038254722952843, "rewards/margins": 0.49389857053756714, "rewards/rejected": -0.5149368047714233, "step": 406 }, { "epoch": 0.05, "learning_rate": 2.9030785438370593e-07, "logits/chosen": -3.210749626159668, "logits/rejected": -3.606515884399414, "logps/chosen": -127.40187072753906, "logps/rejected": -176.50196838378906, "loss": 0.546, "rewards/accuracies": 0.625, "rewards/chosen": 0.005973998457193375, "rewards/margins": 0.5208548903465271, "rewards/rejected": -0.514880895614624, "step": 407 }, { "epoch": 0.05, "learning_rate": 2.902727379140817e-07, "logits/chosen": -3.272876501083374, "logits/rejected": -3.469956874847412, "logps/chosen": -198.27767944335938, "logps/rejected": -197.46701049804688, "loss": 0.4784, "rewards/accuracies": 0.75, "rewards/chosen": -0.10195407271385193, "rewards/margins": 0.6237679719924927, "rewards/rejected": -0.7257220149040222, "step": 408 }, { "epoch": 0.05, "learning_rate": 2.9023762144445744e-07, "logits/chosen": -3.4800057411193848, "logits/rejected": -3.683103561401367, "logps/chosen": -237.06924438476562, "logps/rejected": -269.08050537109375, "loss": 0.4867, "rewards/accuracies": 0.625, "rewards/chosen": -0.04493027180433273, "rewards/margins": 0.7818341255187988, "rewards/rejected": -0.8267643451690674, "step": 409 }, { "epoch": 0.05, "learning_rate": 2.902025049748332e-07, "logits/chosen": -3.3969452381134033, "logits/rejected": -3.2532665729522705, "logps/chosen": -349.688232421875, "logps/rejected": -176.83021545410156, "loss": 0.6842, "rewards/accuracies": 0.5, "rewards/chosen": -0.3673829436302185, "rewards/margins": 0.11574295163154602, "rewards/rejected": -0.48312586545944214, "step": 410 }, { "epoch": 0.05, "learning_rate": 2.901673885052089e-07, "logits/chosen": -2.668060302734375, "logits/rejected": -2.755876064300537, "logps/chosen": -275.920654296875, "logps/rejected": -223.3485870361328, "loss": 0.4811, "rewards/accuracies": 0.75, "rewards/chosen": 0.018153982236981392, "rewards/margins": 0.6309922337532043, "rewards/rejected": -0.6128382682800293, "step": 411 }, { "epoch": 0.05, "learning_rate": 2.901322720355847e-07, "logits/chosen": -2.6855573654174805, "logits/rejected": -2.578275203704834, "logps/chosen": -350.96002197265625, "logps/rejected": -258.6368103027344, "loss": 0.4263, "rewards/accuracies": 0.875, "rewards/chosen": 0.19422507286071777, "rewards/margins": 0.7949941158294678, "rewards/rejected": -0.60076904296875, "step": 412 }, { "epoch": 0.05, "learning_rate": 2.9009715556596046e-07, "logits/chosen": -2.865835189819336, "logits/rejected": -2.7778525352478027, "logps/chosen": -240.86866760253906, "logps/rejected": -297.23870849609375, "loss": 0.4525, "rewards/accuracies": 0.875, "rewards/chosen": 0.22952251136302948, "rewards/margins": 0.7440728545188904, "rewards/rejected": -0.5145503878593445, "step": 413 }, { "epoch": 0.05, "learning_rate": 2.9006203909633616e-07, "logits/chosen": -2.6799004077911377, "logits/rejected": -2.5991106033325195, "logps/chosen": -380.75042724609375, "logps/rejected": -232.90187072753906, "loss": 0.4246, "rewards/accuracies": 1.0, "rewards/chosen": 0.2754175066947937, "rewards/margins": 0.7328726053237915, "rewards/rejected": -0.4574550986289978, "step": 414 }, { "epoch": 0.05, "learning_rate": 2.900269226267119e-07, "logits/chosen": -3.2311782836914062, "logits/rejected": -3.590186834335327, "logps/chosen": -219.71585083007812, "logps/rejected": -264.0310363769531, "loss": 0.2875, "rewards/accuracies": 1.0, "rewards/chosen": 0.27128270268440247, "rewards/margins": 1.4048019647598267, "rewards/rejected": -1.1335194110870361, "step": 415 }, { "epoch": 0.05, "learning_rate": 2.8999180615708766e-07, "logits/chosen": -3.3896942138671875, "logits/rejected": -3.7524070739746094, "logps/chosen": -74.85578918457031, "logps/rejected": -152.80722045898438, "loss": 0.5737, "rewards/accuracies": 0.625, "rewards/chosen": 0.04244138300418854, "rewards/margins": 0.539861798286438, "rewards/rejected": -0.49742045998573303, "step": 416 }, { "epoch": 0.05, "learning_rate": 2.899566896874634e-07, "logits/chosen": -3.9499166011810303, "logits/rejected": -3.4246063232421875, "logps/chosen": -419.5428466796875, "logps/rejected": -234.18382263183594, "loss": 0.465, "rewards/accuracies": 0.75, "rewards/chosen": -0.12455032020807266, "rewards/margins": 0.9598172307014465, "rewards/rejected": -1.0843676328659058, "step": 417 }, { "epoch": 0.05, "learning_rate": 2.8992157321783917e-07, "logits/chosen": -2.850705623626709, "logits/rejected": -2.933830738067627, "logps/chosen": -317.0789794921875, "logps/rejected": -246.9700927734375, "loss": 0.4698, "rewards/accuracies": 0.875, "rewards/chosen": 0.1677778661251068, "rewards/margins": 0.6241986751556396, "rewards/rejected": -0.45642077922821045, "step": 418 }, { "epoch": 0.05, "learning_rate": 2.8988645674821487e-07, "logits/chosen": -2.9594523906707764, "logits/rejected": -2.848459005355835, "logps/chosen": -356.214599609375, "logps/rejected": -246.29466247558594, "loss": 0.4705, "rewards/accuracies": 0.875, "rewards/chosen": 0.0382872074842453, "rewards/margins": 0.6664814949035645, "rewards/rejected": -0.628194272518158, "step": 419 }, { "epoch": 0.05, "learning_rate": 2.8985134027859063e-07, "logits/chosen": -2.657510280609131, "logits/rejected": -2.9935638904571533, "logps/chosen": -147.80377197265625, "logps/rejected": -262.690185546875, "loss": 0.6125, "rewards/accuracies": 0.625, "rewards/chosen": -0.4110000431537628, "rewards/margins": 0.2875770032405853, "rewards/rejected": -0.6985770463943481, "step": 420 }, { "epoch": 0.05, "learning_rate": 2.898162238089664e-07, "logits/chosen": -3.2642645835876465, "logits/rejected": -3.039949893951416, "logps/chosen": -450.27838134765625, "logps/rejected": -256.2910461425781, "loss": 0.6544, "rewards/accuracies": 0.625, "rewards/chosen": -0.032554350793361664, "rewards/margins": 0.5248735547065735, "rewards/rejected": -0.5574278831481934, "step": 421 }, { "epoch": 0.05, "learning_rate": 2.8978110733934213e-07, "logits/chosen": -2.406627893447876, "logits/rejected": -2.6548261642456055, "logps/chosen": -133.6346435546875, "logps/rejected": -142.57772827148438, "loss": 0.695, "rewards/accuracies": 0.375, "rewards/chosen": -0.24219754338264465, "rewards/margins": 0.5127408504486084, "rewards/rejected": -0.7549383640289307, "step": 422 }, { "epoch": 0.05, "learning_rate": 2.897459908697179e-07, "logits/chosen": -2.6494946479797363, "logits/rejected": -2.6429615020751953, "logps/chosen": -501.8548889160156, "logps/rejected": -324.11907958984375, "loss": 0.477, "rewards/accuracies": 0.75, "rewards/chosen": 0.027265071868896484, "rewards/margins": 0.7837241291999817, "rewards/rejected": -0.7564589977264404, "step": 423 }, { "epoch": 0.05, "learning_rate": 2.8971087440009364e-07, "logits/chosen": -2.8341546058654785, "logits/rejected": -3.268275022506714, "logps/chosen": -296.7918701171875, "logps/rejected": -225.91140747070312, "loss": 0.4892, "rewards/accuracies": 0.875, "rewards/chosen": 0.18758806586265564, "rewards/margins": 1.2077127695083618, "rewards/rejected": -1.0201246738433838, "step": 424 }, { "epoch": 0.05, "learning_rate": 2.896757579304694e-07, "logits/chosen": -2.8102188110351562, "logits/rejected": -3.1215076446533203, "logps/chosen": -279.5031433105469, "logps/rejected": -247.31483459472656, "loss": 0.4934, "rewards/accuracies": 0.75, "rewards/chosen": 0.1715620905160904, "rewards/margins": 1.052005410194397, "rewards/rejected": -0.8804432153701782, "step": 425 }, { "epoch": 0.05, "learning_rate": 2.8964064146084515e-07, "logits/chosen": -2.8398847579956055, "logits/rejected": -2.6468255519866943, "logps/chosen": -257.25830078125, "logps/rejected": -268.04266357421875, "loss": 0.5065, "rewards/accuracies": 0.875, "rewards/chosen": 0.03891228884458542, "rewards/margins": 0.7750474214553833, "rewards/rejected": -0.7361351251602173, "step": 426 }, { "epoch": 0.05, "learning_rate": 2.8960552499122085e-07, "logits/chosen": -2.5891106128692627, "logits/rejected": -2.562657117843628, "logps/chosen": -351.991943359375, "logps/rejected": -154.7635040283203, "loss": 0.9136, "rewards/accuracies": 0.5, "rewards/chosen": -0.37533143162727356, "rewards/margins": -0.1489536166191101, "rewards/rejected": -0.22637782990932465, "step": 427 }, { "epoch": 0.05, "learning_rate": 2.895704085215966e-07, "logits/chosen": -3.0758864879608154, "logits/rejected": -3.2650647163391113, "logps/chosen": -310.06512451171875, "logps/rejected": -270.84613037109375, "loss": 0.622, "rewards/accuracies": 0.625, "rewards/chosen": -0.24239929020404816, "rewards/margins": 0.4100295901298523, "rewards/rejected": -0.6524288654327393, "step": 428 }, { "epoch": 0.05, "learning_rate": 2.8953529205197236e-07, "logits/chosen": -3.0198135375976562, "logits/rejected": -3.286731719970703, "logps/chosen": -395.9908447265625, "logps/rejected": -352.3685302734375, "loss": 0.4475, "rewards/accuracies": 0.75, "rewards/chosen": -0.08217925578355789, "rewards/margins": 1.4642651081085205, "rewards/rejected": -1.546444296836853, "step": 429 }, { "epoch": 0.05, "learning_rate": 2.895001755823481e-07, "logits/chosen": -3.727076530456543, "logits/rejected": -3.518991470336914, "logps/chosen": -258.24822998046875, "logps/rejected": -165.10745239257812, "loss": 0.5922, "rewards/accuracies": 0.625, "rewards/chosen": -0.03867786377668381, "rewards/margins": 0.500251829624176, "rewards/rejected": -0.5389297008514404, "step": 430 }, { "epoch": 0.05, "learning_rate": 2.8946505911272387e-07, "logits/chosen": -2.4576897621154785, "logits/rejected": -2.303232192993164, "logps/chosen": -346.661376953125, "logps/rejected": -254.64068603515625, "loss": 0.4757, "rewards/accuracies": 0.875, "rewards/chosen": 0.09284898638725281, "rewards/margins": 0.8508399128913879, "rewards/rejected": -0.7579909563064575, "step": 431 }, { "epoch": 0.05, "learning_rate": 2.8942994264309957e-07, "logits/chosen": -3.332794666290283, "logits/rejected": -3.2342536449432373, "logps/chosen": -187.4974365234375, "logps/rejected": -211.42694091796875, "loss": 0.3537, "rewards/accuracies": 0.875, "rewards/chosen": 0.22367991507053375, "rewards/margins": 1.3280267715454102, "rewards/rejected": -1.1043466329574585, "step": 432 }, { "epoch": 0.05, "learning_rate": 2.893948261734754e-07, "logits/chosen": -3.6305017471313477, "logits/rejected": -3.377901077270508, "logps/chosen": -239.5826416015625, "logps/rejected": -153.404541015625, "loss": 0.5964, "rewards/accuracies": 0.625, "rewards/chosen": -0.4212096333503723, "rewards/margins": 0.5863312482833862, "rewards/rejected": -1.0075409412384033, "step": 433 }, { "epoch": 0.05, "learning_rate": 2.8935970970385113e-07, "logits/chosen": -2.8274264335632324, "logits/rejected": -3.0231239795684814, "logps/chosen": -121.36795043945312, "logps/rejected": -150.42874145507812, "loss": 0.4203, "rewards/accuracies": 0.875, "rewards/chosen": 0.22381076216697693, "rewards/margins": 0.8497442007064819, "rewards/rejected": -0.6259334087371826, "step": 434 }, { "epoch": 0.05, "learning_rate": 2.8932459323422683e-07, "logits/chosen": -3.2575225830078125, "logits/rejected": -3.0279829502105713, "logps/chosen": -239.0912628173828, "logps/rejected": -156.88623046875, "loss": 0.7652, "rewards/accuracies": 0.625, "rewards/chosen": -0.3596770167350769, "rewards/margins": 0.052212025970220566, "rewards/rejected": -0.4118890166282654, "step": 435 }, { "epoch": 0.05, "learning_rate": 2.892894767646026e-07, "logits/chosen": -3.0100252628326416, "logits/rejected": -3.157221555709839, "logps/chosen": -329.5337829589844, "logps/rejected": -218.9036865234375, "loss": 0.5606, "rewards/accuracies": 0.5, "rewards/chosen": -0.0907401591539383, "rewards/margins": 0.6632818579673767, "rewards/rejected": -0.7540220022201538, "step": 436 }, { "epoch": 0.05, "learning_rate": 2.8925436029497834e-07, "logits/chosen": -2.998896360397339, "logits/rejected": -2.773792266845703, "logps/chosen": -307.9561767578125, "logps/rejected": -194.65496826171875, "loss": 0.8778, "rewards/accuracies": 0.375, "rewards/chosen": -0.4579678475856781, "rewards/margins": -0.12219326198101044, "rewards/rejected": -0.33577460050582886, "step": 437 }, { "epoch": 0.05, "learning_rate": 2.892192438253541e-07, "logits/chosen": -2.887543201446533, "logits/rejected": -2.7657833099365234, "logps/chosen": -234.41851806640625, "logps/rejected": -256.7139892578125, "loss": 0.8364, "rewards/accuracies": 0.625, "rewards/chosen": -0.2570115923881531, "rewards/margins": 0.5620640516281128, "rewards/rejected": -0.8190757036209106, "step": 438 }, { "epoch": 0.05, "learning_rate": 2.8918412735572984e-07, "logits/chosen": -2.729084014892578, "logits/rejected": -2.7433485984802246, "logps/chosen": -117.47038269042969, "logps/rejected": -230.05661010742188, "loss": 0.5783, "rewards/accuracies": 0.75, "rewards/chosen": 0.194748193025589, "rewards/margins": 0.6182531118392944, "rewards/rejected": -0.42350485920906067, "step": 439 }, { "epoch": 0.05, "learning_rate": 2.8914901088610555e-07, "logits/chosen": -4.01908540725708, "logits/rejected": -4.057231903076172, "logps/chosen": -319.78985595703125, "logps/rejected": -238.83909606933594, "loss": 0.5, "rewards/accuracies": 0.75, "rewards/chosen": -0.362660676240921, "rewards/margins": 1.0223525762557983, "rewards/rejected": -1.3850133419036865, "step": 440 }, { "epoch": 0.05, "learning_rate": 2.891138944164813e-07, "logits/chosen": -3.1168556213378906, "logits/rejected": -3.4155185222625732, "logps/chosen": -65.08560180664062, "logps/rejected": -252.98086547851562, "loss": 0.4027, "rewards/accuracies": 0.75, "rewards/chosen": 0.0746397003531456, "rewards/margins": 1.4310288429260254, "rewards/rejected": -1.356389045715332, "step": 441 }, { "epoch": 0.05, "learning_rate": 2.8907877794685705e-07, "logits/chosen": -2.983868360519409, "logits/rejected": -3.1380386352539062, "logps/chosen": -178.57437133789062, "logps/rejected": -239.548095703125, "loss": 0.4207, "rewards/accuracies": 0.75, "rewards/chosen": 0.1182938814163208, "rewards/margins": 1.0859735012054443, "rewards/rejected": -0.9676795601844788, "step": 442 }, { "epoch": 0.05, "learning_rate": 2.890436614772328e-07, "logits/chosen": -3.280564069747925, "logits/rejected": -2.9529080390930176, "logps/chosen": -280.44000244140625, "logps/rejected": -160.4457550048828, "loss": 0.6225, "rewards/accuracies": 0.75, "rewards/chosen": -0.1865798830986023, "rewards/margins": 0.62520432472229, "rewards/rejected": -0.8117842078208923, "step": 443 }, { "epoch": 0.05, "learning_rate": 2.8900854500760856e-07, "logits/chosen": -3.452803611755371, "logits/rejected": -3.4232821464538574, "logps/chosen": -445.9339599609375, "logps/rejected": -183.57379150390625, "loss": 0.5432, "rewards/accuracies": 0.75, "rewards/chosen": -0.06135844439268112, "rewards/margins": 0.416906476020813, "rewards/rejected": -0.4782648980617523, "step": 444 }, { "epoch": 0.05, "learning_rate": 2.8897342853798426e-07, "logits/chosen": -2.914935350418091, "logits/rejected": -2.862522840499878, "logps/chosen": -239.44798278808594, "logps/rejected": -186.4224395751953, "loss": 0.4462, "rewards/accuracies": 0.75, "rewards/chosen": -0.2786865234375, "rewards/margins": 0.7166086435317993, "rewards/rejected": -0.9952951669692993, "step": 445 }, { "epoch": 0.05, "learning_rate": 2.8893831206836007e-07, "logits/chosen": -3.292107582092285, "logits/rejected": -3.110042095184326, "logps/chosen": -227.95521545410156, "logps/rejected": -254.63314819335938, "loss": 0.5106, "rewards/accuracies": 0.625, "rewards/chosen": -0.44962915778160095, "rewards/margins": 0.9801754355430603, "rewards/rejected": -1.4298045635223389, "step": 446 }, { "epoch": 0.05, "learning_rate": 2.889031955987358e-07, "logits/chosen": -2.576687812805176, "logits/rejected": -2.4736924171447754, "logps/chosen": -399.8274230957031, "logps/rejected": -401.70947265625, "loss": 0.5143, "rewards/accuracies": 0.75, "rewards/chosen": -0.04490023851394653, "rewards/margins": 0.5428970456123352, "rewards/rejected": -0.5877972841262817, "step": 447 }, { "epoch": 0.05, "learning_rate": 2.888680791291115e-07, "logits/chosen": -3.1054911613464355, "logits/rejected": -2.8871450424194336, "logps/chosen": -402.8165283203125, "logps/rejected": -269.0899963378906, "loss": 0.5287, "rewards/accuracies": 0.75, "rewards/chosen": -0.02926502376794815, "rewards/margins": 0.8205345273017883, "rewards/rejected": -0.8497995734214783, "step": 448 }, { "epoch": 0.05, "learning_rate": 2.888329626594873e-07, "logits/chosen": -3.2220678329467773, "logits/rejected": -2.916114091873169, "logps/chosen": -328.8043518066406, "logps/rejected": -163.7899932861328, "loss": 0.4446, "rewards/accuracies": 0.75, "rewards/chosen": 0.25431814789772034, "rewards/margins": 0.7400802373886108, "rewards/rejected": -0.4857621192932129, "step": 449 }, { "epoch": 0.05, "learning_rate": 2.8879784618986303e-07, "logits/chosen": -3.117187976837158, "logits/rejected": -3.060683012008667, "logps/chosen": -420.4107666015625, "logps/rejected": -281.23809814453125, "loss": 0.4774, "rewards/accuracies": 0.625, "rewards/chosen": 0.1414695680141449, "rewards/margins": 1.1993730068206787, "rewards/rejected": -1.0579032897949219, "step": 450 }, { "epoch": 0.05, "learning_rate": 2.887627297202388e-07, "logits/chosen": -3.2127084732055664, "logits/rejected": -3.2453420162200928, "logps/chosen": -305.41766357421875, "logps/rejected": -272.09527587890625, "loss": 0.5381, "rewards/accuracies": 0.75, "rewards/chosen": 0.04543359577655792, "rewards/margins": 0.8024728298187256, "rewards/rejected": -0.7570393085479736, "step": 451 }, { "epoch": 0.05, "learning_rate": 2.8872761325061454e-07, "logits/chosen": -3.012664556503296, "logits/rejected": -3.139662742614746, "logps/chosen": -331.30810546875, "logps/rejected": -239.49822998046875, "loss": 0.4507, "rewards/accuracies": 0.75, "rewards/chosen": 0.01095910556614399, "rewards/margins": 1.2323007583618164, "rewards/rejected": -1.221341609954834, "step": 452 }, { "epoch": 0.05, "learning_rate": 2.8869249678099024e-07, "logits/chosen": -2.527912139892578, "logits/rejected": -2.838423252105713, "logps/chosen": -86.77651977539062, "logps/rejected": -169.56405639648438, "loss": 0.747, "rewards/accuracies": 0.5, "rewards/chosen": -0.1546945571899414, "rewards/margins": 0.3063422441482544, "rewards/rejected": -0.4610367715358734, "step": 453 }, { "epoch": 0.05, "learning_rate": 2.88657380311366e-07, "logits/chosen": -2.7566442489624023, "logits/rejected": -2.975444793701172, "logps/chosen": -237.46656799316406, "logps/rejected": -174.21327209472656, "loss": 0.6195, "rewards/accuracies": 0.625, "rewards/chosen": -0.16275857388973236, "rewards/margins": 0.29780369997024536, "rewards/rejected": -0.4605622887611389, "step": 454 }, { "epoch": 0.05, "learning_rate": 2.886222638417418e-07, "logits/chosen": -2.4827194213867188, "logits/rejected": -2.5032083988189697, "logps/chosen": -139.6429901123047, "logps/rejected": -260.80902099609375, "loss": 0.5104, "rewards/accuracies": 0.75, "rewards/chosen": -0.16889016330242157, "rewards/margins": 0.7242395281791687, "rewards/rejected": -0.8931295871734619, "step": 455 }, { "epoch": 0.05, "learning_rate": 2.885871473721175e-07, "logits/chosen": -3.0726680755615234, "logits/rejected": -2.7118992805480957, "logps/chosen": -210.5627899169922, "logps/rejected": -260.4034423828125, "loss": 0.4671, "rewards/accuracies": 0.625, "rewards/chosen": 0.3428042531013489, "rewards/margins": 1.0605111122131348, "rewards/rejected": -0.7177067995071411, "step": 456 }, { "epoch": 0.05, "learning_rate": 2.8855203090249325e-07, "logits/chosen": -2.5457630157470703, "logits/rejected": -2.798626661300659, "logps/chosen": -589.6298828125, "logps/rejected": -407.348388671875, "loss": 0.3411, "rewards/accuracies": 1.0, "rewards/chosen": 0.1833634376525879, "rewards/margins": 0.9715942144393921, "rewards/rejected": -0.7882307171821594, "step": 457 }, { "epoch": 0.05, "learning_rate": 2.88516914432869e-07, "logits/chosen": -2.9093854427337646, "logits/rejected": -2.866203546524048, "logps/chosen": -151.8306884765625, "logps/rejected": -220.07723999023438, "loss": 0.44, "rewards/accuracies": 1.0, "rewards/chosen": 0.02912278287112713, "rewards/margins": 1.054234504699707, "rewards/rejected": -1.0251119136810303, "step": 458 }, { "epoch": 0.05, "learning_rate": 2.8848179796324476e-07, "logits/chosen": -3.245063543319702, "logits/rejected": -3.2865824699401855, "logps/chosen": -169.97947692871094, "logps/rejected": -205.06236267089844, "loss": 0.4376, "rewards/accuracies": 0.75, "rewards/chosen": -0.20643648505210876, "rewards/margins": 0.8170823454856873, "rewards/rejected": -1.0235188007354736, "step": 459 }, { "epoch": 0.05, "learning_rate": 2.884466814936205e-07, "logits/chosen": -3.3763699531555176, "logits/rejected": -3.4078330993652344, "logps/chosen": -224.6304473876953, "logps/rejected": -199.60533142089844, "loss": 0.6509, "rewards/accuracies": 0.625, "rewards/chosen": -0.20913316309452057, "rewards/margins": 0.22670747339725494, "rewards/rejected": -0.4358406662940979, "step": 460 }, { "epoch": 0.05, "learning_rate": 2.884115650239962e-07, "logits/chosen": -3.5697882175445557, "logits/rejected": -3.449800968170166, "logps/chosen": -329.3570556640625, "logps/rejected": -279.0393371582031, "loss": 0.4055, "rewards/accuracies": 0.875, "rewards/chosen": 0.007454387843608856, "rewards/margins": 1.5074009895324707, "rewards/rejected": -1.4999465942382812, "step": 461 }, { "epoch": 0.05, "learning_rate": 2.8837644855437197e-07, "logits/chosen": -3.077784538269043, "logits/rejected": -3.2358601093292236, "logps/chosen": -402.5267639160156, "logps/rejected": -298.3506774902344, "loss": 0.3972, "rewards/accuracies": 0.75, "rewards/chosen": 0.17414353787899017, "rewards/margins": 1.239652395248413, "rewards/rejected": -1.0655088424682617, "step": 462 }, { "epoch": 0.05, "learning_rate": 2.883413320847477e-07, "logits/chosen": -3.308037042617798, "logits/rejected": -3.1235296726226807, "logps/chosen": -244.96963500976562, "logps/rejected": -194.09091186523438, "loss": 0.5236, "rewards/accuracies": 0.875, "rewards/chosen": -0.013576053082942963, "rewards/margins": 0.47806257009506226, "rewards/rejected": -0.4916386306285858, "step": 463 }, { "epoch": 0.05, "learning_rate": 2.883062156151235e-07, "logits/chosen": -3.2055907249450684, "logits/rejected": -3.123772144317627, "logps/chosen": -234.43942260742188, "logps/rejected": -292.5718688964844, "loss": 0.3388, "rewards/accuracies": 0.875, "rewards/chosen": -0.14657142758369446, "rewards/margins": 1.463843822479248, "rewards/rejected": -1.6104153394699097, "step": 464 }, { "epoch": 0.05, "learning_rate": 2.8827109914549923e-07, "logits/chosen": -2.8594017028808594, "logits/rejected": -2.804487705230713, "logps/chosen": -263.6805725097656, "logps/rejected": -368.3585510253906, "loss": 0.2849, "rewards/accuracies": 1.0, "rewards/chosen": 0.27003681659698486, "rewards/margins": 1.2551006078720093, "rewards/rejected": -0.9850637912750244, "step": 465 }, { "epoch": 0.05, "learning_rate": 2.8823598267587493e-07, "logits/chosen": -2.5701279640197754, "logits/rejected": -2.8582043647766113, "logps/chosen": -138.20327758789062, "logps/rejected": -173.23362731933594, "loss": 0.6408, "rewards/accuracies": 0.625, "rewards/chosen": -0.4296302795410156, "rewards/margins": 0.6491732001304626, "rewards/rejected": -1.0788034200668335, "step": 466 }, { "epoch": 0.05, "learning_rate": 2.8820086620625074e-07, "logits/chosen": -3.6446046829223633, "logits/rejected": -3.315652847290039, "logps/chosen": -245.41732788085938, "logps/rejected": -236.41683959960938, "loss": 0.7151, "rewards/accuracies": 0.625, "rewards/chosen": -0.44958096742630005, "rewards/margins": 0.35197514295578003, "rewards/rejected": -0.8015561103820801, "step": 467 }, { "epoch": 0.05, "learning_rate": 2.881657497366265e-07, "logits/chosen": -3.2038822174072266, "logits/rejected": -2.8926000595092773, "logps/chosen": -456.45208740234375, "logps/rejected": -239.56100463867188, "loss": 0.7909, "rewards/accuracies": 0.625, "rewards/chosen": -0.19957531988620758, "rewards/margins": 0.5295236706733704, "rewards/rejected": -0.7290990352630615, "step": 468 }, { "epoch": 0.05, "learning_rate": 2.881306332670022e-07, "logits/chosen": -3.096151351928711, "logits/rejected": -2.846104621887207, "logps/chosen": -128.83131408691406, "logps/rejected": -205.9220428466797, "loss": 0.4925, "rewards/accuracies": 0.625, "rewards/chosen": -0.26024311780929565, "rewards/margins": 0.8299098610877991, "rewards/rejected": -1.0901530981063843, "step": 469 }, { "epoch": 0.05, "learning_rate": 2.8809551679737795e-07, "logits/chosen": -3.233553886413574, "logits/rejected": -3.096565008163452, "logps/chosen": -69.12287902832031, "logps/rejected": -119.07392883300781, "loss": 0.5257, "rewards/accuracies": 0.625, "rewards/chosen": 0.18879368901252747, "rewards/margins": 0.5164449214935303, "rewards/rejected": -0.3276512324810028, "step": 470 }, { "epoch": 0.05, "learning_rate": 2.880604003277537e-07, "logits/chosen": -3.042024612426758, "logits/rejected": -3.27488374710083, "logps/chosen": -328.2177734375, "logps/rejected": -225.53134155273438, "loss": 0.3151, "rewards/accuracies": 1.0, "rewards/chosen": 0.4705287218093872, "rewards/margins": 2.0412919521331787, "rewards/rejected": -1.570763111114502, "step": 471 }, { "epoch": 0.05, "learning_rate": 2.8802528385812946e-07, "logits/chosen": -3.3470211029052734, "logits/rejected": -3.0811100006103516, "logps/chosen": -418.20745849609375, "logps/rejected": -254.3585205078125, "loss": 0.5836, "rewards/accuracies": 0.75, "rewards/chosen": -0.1571069061756134, "rewards/margins": 0.4423336386680603, "rewards/rejected": -0.5994405746459961, "step": 472 }, { "epoch": 0.05, "learning_rate": 2.879901673885052e-07, "logits/chosen": -3.5778961181640625, "logits/rejected": -3.027993679046631, "logps/chosen": -388.94073486328125, "logps/rejected": -134.12550354003906, "loss": 0.5148, "rewards/accuracies": 0.75, "rewards/chosen": 0.0047159940004348755, "rewards/margins": 0.7017173767089844, "rewards/rejected": -0.6970013380050659, "step": 473 }, { "epoch": 0.05, "learning_rate": 2.879550509188809e-07, "logits/chosen": -2.821014881134033, "logits/rejected": -2.579470634460449, "logps/chosen": -397.657470703125, "logps/rejected": -238.9468536376953, "loss": 0.6333, "rewards/accuracies": 0.5, "rewards/chosen": 0.039274368435144424, "rewards/margins": 0.24432459473609924, "rewards/rejected": -0.2050502449274063, "step": 474 }, { "epoch": 0.05, "learning_rate": 2.8791993444925667e-07, "logits/chosen": -2.7070438861846924, "logits/rejected": -2.735952854156494, "logps/chosen": -226.82904052734375, "logps/rejected": -345.65618896484375, "loss": 0.4714, "rewards/accuracies": 0.75, "rewards/chosen": 0.021621137857437134, "rewards/margins": 1.3002173900604248, "rewards/rejected": -1.27859628200531, "step": 475 }, { "epoch": 0.05, "learning_rate": 2.878848179796324e-07, "logits/chosen": -3.484994649887085, "logits/rejected": -3.8121988773345947, "logps/chosen": -136.73118591308594, "logps/rejected": -263.3206481933594, "loss": 0.4021, "rewards/accuracies": 0.75, "rewards/chosen": 0.025934603065252304, "rewards/margins": 1.8335849046707153, "rewards/rejected": -1.8076503276824951, "step": 476 }, { "epoch": 0.05, "learning_rate": 2.8784970151000817e-07, "logits/chosen": -3.5027527809143066, "logits/rejected": -3.657472848892212, "logps/chosen": -206.80128479003906, "logps/rejected": -266.017822265625, "loss": 0.4624, "rewards/accuracies": 0.75, "rewards/chosen": -0.11314955353736877, "rewards/margins": 1.2086315155029297, "rewards/rejected": -1.321781039237976, "step": 477 }, { "epoch": 0.06, "learning_rate": 2.8781458504038393e-07, "logits/chosen": -3.0798563957214355, "logits/rejected": -2.9016366004943848, "logps/chosen": -360.1195373535156, "logps/rejected": -251.15463256835938, "loss": 0.7105, "rewards/accuracies": 0.5, "rewards/chosen": -0.11658275872468948, "rewards/margins": 0.058493778109550476, "rewards/rejected": -0.17507654428482056, "step": 478 }, { "epoch": 0.06, "learning_rate": 2.877794685707597e-07, "logits/chosen": -3.3471498489379883, "logits/rejected": -3.452831268310547, "logps/chosen": -219.1038818359375, "logps/rejected": -281.0732116699219, "loss": 0.3734, "rewards/accuracies": 0.875, "rewards/chosen": 0.014966841787099838, "rewards/margins": 1.28567636013031, "rewards/rejected": -1.2707096338272095, "step": 479 }, { "epoch": 0.06, "learning_rate": 2.8774435210113543e-07, "logits/chosen": -3.4063801765441895, "logits/rejected": -3.2551231384277344, "logps/chosen": -273.7037353515625, "logps/rejected": -306.4983825683594, "loss": 0.4655, "rewards/accuracies": 0.75, "rewards/chosen": -0.13756990432739258, "rewards/margins": 1.3235504627227783, "rewards/rejected": -1.461120367050171, "step": 480 }, { "epoch": 0.06, "learning_rate": 2.877092356315112e-07, "logits/chosen": -2.888484239578247, "logits/rejected": -2.6733169555664062, "logps/chosen": -214.738525390625, "logps/rejected": -278.33245849609375, "loss": 0.4701, "rewards/accuracies": 0.75, "rewards/chosen": -0.1592889428138733, "rewards/margins": 0.799102246761322, "rewards/rejected": -0.9583912491798401, "step": 481 }, { "epoch": 0.06, "learning_rate": 2.876741191618869e-07, "logits/chosen": -3.345571756362915, "logits/rejected": -3.554896593093872, "logps/chosen": -97.7536392211914, "logps/rejected": -173.86065673828125, "loss": 0.4443, "rewards/accuracies": 0.625, "rewards/chosen": 0.037883177399635315, "rewards/margins": 1.0296493768692017, "rewards/rejected": -0.9917662143707275, "step": 482 }, { "epoch": 0.06, "learning_rate": 2.8763900269226264e-07, "logits/chosen": -3.767484664916992, "logits/rejected": -3.597215175628662, "logps/chosen": -231.55938720703125, "logps/rejected": -170.04779052734375, "loss": 0.671, "rewards/accuracies": 0.75, "rewards/chosen": -0.39047712087631226, "rewards/margins": 0.10898162424564362, "rewards/rejected": -0.4994587302207947, "step": 483 }, { "epoch": 0.06, "learning_rate": 2.876038862226384e-07, "logits/chosen": -3.1447672843933105, "logits/rejected": -3.406238317489624, "logps/chosen": -217.88648986816406, "logps/rejected": -283.2392272949219, "loss": 0.308, "rewards/accuracies": 0.875, "rewards/chosen": -0.10717424750328064, "rewards/margins": 1.4090369939804077, "rewards/rejected": -1.5162112712860107, "step": 484 }, { "epoch": 0.06, "learning_rate": 2.8756876975301415e-07, "logits/chosen": -3.735593795776367, "logits/rejected": -3.6610612869262695, "logps/chosen": -208.30088806152344, "logps/rejected": -181.7802734375, "loss": 0.5141, "rewards/accuracies": 0.875, "rewards/chosen": -0.10227297246456146, "rewards/margins": 0.6313957571983337, "rewards/rejected": -0.7336687445640564, "step": 485 }, { "epoch": 0.06, "learning_rate": 2.875336532833899e-07, "logits/chosen": -3.4746227264404297, "logits/rejected": -3.8396947383880615, "logps/chosen": -166.04409790039062, "logps/rejected": -246.57850646972656, "loss": 0.4345, "rewards/accuracies": 0.625, "rewards/chosen": -0.3677878975868225, "rewards/margins": 1.6241183280944824, "rewards/rejected": -1.9919061660766602, "step": 486 }, { "epoch": 0.06, "learning_rate": 2.8749853681376566e-07, "logits/chosen": -2.9436943531036377, "logits/rejected": -3.0604281425476074, "logps/chosen": -207.33624267578125, "logps/rejected": -228.96389770507812, "loss": 0.3463, "rewards/accuracies": 0.75, "rewards/chosen": -0.004196275025606155, "rewards/margins": 1.4612349271774292, "rewards/rejected": -1.4654312133789062, "step": 487 }, { "epoch": 0.06, "learning_rate": 2.8746342034414136e-07, "logits/chosen": -3.0453977584838867, "logits/rejected": -3.0727570056915283, "logps/chosen": -314.0736389160156, "logps/rejected": -272.1146240234375, "loss": 0.4917, "rewards/accuracies": 0.5, "rewards/chosen": -0.19971731305122375, "rewards/margins": 1.133684515953064, "rewards/rejected": -1.3334019184112549, "step": 488 }, { "epoch": 0.06, "learning_rate": 2.8742830387451717e-07, "logits/chosen": -2.878746509552002, "logits/rejected": -2.851273536682129, "logps/chosen": -367.4239501953125, "logps/rejected": -384.2108459472656, "loss": 0.3097, "rewards/accuracies": 1.0, "rewards/chosen": 0.1495692878961563, "rewards/margins": 1.4559946060180664, "rewards/rejected": -1.3064253330230713, "step": 489 }, { "epoch": 0.06, "learning_rate": 2.8739318740489287e-07, "logits/chosen": -1.9737764596939087, "logits/rejected": -2.142622470855713, "logps/chosen": -312.28680419921875, "logps/rejected": -245.7939453125, "loss": 0.4212, "rewards/accuracies": 0.625, "rewards/chosen": 0.08468474447727203, "rewards/margins": 0.9715059995651245, "rewards/rejected": -0.8868212699890137, "step": 490 }, { "epoch": 0.06, "learning_rate": 2.873580709352686e-07, "logits/chosen": -3.0486574172973633, "logits/rejected": -2.7361245155334473, "logps/chosen": -192.8045654296875, "logps/rejected": -210.4586181640625, "loss": 0.9373, "rewards/accuracies": 0.5, "rewards/chosen": -0.7772664427757263, "rewards/margins": -0.05737540125846863, "rewards/rejected": -0.7198910117149353, "step": 491 }, { "epoch": 0.06, "learning_rate": 2.873229544656444e-07, "logits/chosen": -2.6409361362457275, "logits/rejected": -2.6498703956604004, "logps/chosen": -144.90940856933594, "logps/rejected": -239.55465698242188, "loss": 0.4943, "rewards/accuracies": 0.75, "rewards/chosen": 0.034305423498153687, "rewards/margins": 0.7900944352149963, "rewards/rejected": -0.755789041519165, "step": 492 }, { "epoch": 0.06, "learning_rate": 2.8728783799602013e-07, "logits/chosen": -3.17602801322937, "logits/rejected": -3.413388252258301, "logps/chosen": -202.27537536621094, "logps/rejected": -281.008544921875, "loss": 0.3236, "rewards/accuracies": 1.0, "rewards/chosen": 0.2670978307723999, "rewards/margins": 1.3742003440856934, "rewards/rejected": -1.1071025133132935, "step": 493 }, { "epoch": 0.06, "learning_rate": 2.872527215263959e-07, "logits/chosen": -3.530827522277832, "logits/rejected": -3.375345230102539, "logps/chosen": -304.483642578125, "logps/rejected": -319.90087890625, "loss": 0.5451, "rewards/accuracies": 0.625, "rewards/chosen": -0.22065424919128418, "rewards/margins": 0.6409857273101807, "rewards/rejected": -0.8616399765014648, "step": 494 }, { "epoch": 0.06, "learning_rate": 2.8721760505677164e-07, "logits/chosen": -3.3144350051879883, "logits/rejected": -3.124290704727173, "logps/chosen": -162.87513732910156, "logps/rejected": -182.17413330078125, "loss": 0.5875, "rewards/accuracies": 0.75, "rewards/chosen": 0.21477529406547546, "rewards/margins": 0.6383110880851746, "rewards/rejected": -0.4235357642173767, "step": 495 }, { "epoch": 0.06, "learning_rate": 2.8718248858714734e-07, "logits/chosen": -3.4169209003448486, "logits/rejected": -3.2275640964508057, "logps/chosen": -135.8197784423828, "logps/rejected": -142.04396057128906, "loss": 0.5799, "rewards/accuracies": 0.625, "rewards/chosen": -0.016947001218795776, "rewards/margins": 0.509728729724884, "rewards/rejected": -0.5266757011413574, "step": 496 }, { "epoch": 0.06, "learning_rate": 2.871473721175231e-07, "logits/chosen": -3.227363109588623, "logits/rejected": -3.611828088760376, "logps/chosen": -159.49276733398438, "logps/rejected": -256.2635803222656, "loss": 0.4354, "rewards/accuracies": 0.625, "rewards/chosen": 0.09774680435657501, "rewards/margins": 1.3314770460128784, "rewards/rejected": -1.2337303161621094, "step": 497 }, { "epoch": 0.06, "learning_rate": 2.8711225564789885e-07, "logits/chosen": -2.992539644241333, "logits/rejected": -3.15537691116333, "logps/chosen": -284.51776123046875, "logps/rejected": -227.77810668945312, "loss": 0.4464, "rewards/accuracies": 0.75, "rewards/chosen": 0.1649443805217743, "rewards/margins": 1.209691047668457, "rewards/rejected": -1.04474675655365, "step": 498 }, { "epoch": 0.06, "learning_rate": 2.870771391782746e-07, "logits/chosen": -3.039200782775879, "logits/rejected": -2.9331090450286865, "logps/chosen": -405.0827941894531, "logps/rejected": -371.2900390625, "loss": 0.6963, "rewards/accuracies": 0.75, "rewards/chosen": -0.39746686816215515, "rewards/margins": 0.8559498190879822, "rewards/rejected": -1.253416657447815, "step": 499 }, { "epoch": 0.06, "learning_rate": 2.8704202270865035e-07, "logits/chosen": -2.8560547828674316, "logits/rejected": -3.090045690536499, "logps/chosen": -227.1024169921875, "logps/rejected": -187.82977294921875, "loss": 0.766, "rewards/accuracies": 0.625, "rewards/chosen": -0.19129395484924316, "rewards/margins": 0.482562780380249, "rewards/rejected": -0.673856794834137, "step": 500 }, { "epoch": 0.06, "learning_rate": 2.870069062390261e-07, "logits/chosen": -2.5864219665527344, "logits/rejected": -2.5727272033691406, "logps/chosen": -302.9307861328125, "logps/rejected": -211.59548950195312, "loss": 0.3917, "rewards/accuracies": 0.875, "rewards/chosen": 0.19512784481048584, "rewards/margins": 0.9747135043144226, "rewards/rejected": -0.7795856595039368, "step": 501 }, { "epoch": 0.06, "learning_rate": 2.8697178976940186e-07, "logits/chosen": -2.7894229888916016, "logits/rejected": -2.7230677604675293, "logps/chosen": -189.9849853515625, "logps/rejected": -156.8744659423828, "loss": 0.6785, "rewards/accuracies": 0.5, "rewards/chosen": 0.12064714729785919, "rewards/margins": 0.13896769285202026, "rewards/rejected": -0.018320566043257713, "step": 502 }, { "epoch": 0.06, "learning_rate": 2.869366732997776e-07, "logits/chosen": -2.541473627090454, "logits/rejected": -2.7297348976135254, "logps/chosen": -264.6041259765625, "logps/rejected": -324.25714111328125, "loss": 0.41, "rewards/accuracies": 0.75, "rewards/chosen": 0.16816501319408417, "rewards/margins": 1.1273730993270874, "rewards/rejected": -0.959208071231842, "step": 503 }, { "epoch": 0.06, "learning_rate": 2.869015568301533e-07, "logits/chosen": -2.9365227222442627, "logits/rejected": -3.1242902278900146, "logps/chosen": -125.56317901611328, "logps/rejected": -234.14224243164062, "loss": 0.3605, "rewards/accuracies": 0.75, "rewards/chosen": -0.06341756880283356, "rewards/margins": 1.1993844509124756, "rewards/rejected": -1.262802004814148, "step": 504 }, { "epoch": 0.06, "learning_rate": 2.8686644036052907e-07, "logits/chosen": -2.867974281311035, "logits/rejected": -2.9811301231384277, "logps/chosen": -198.185791015625, "logps/rejected": -329.2797546386719, "loss": 0.6382, "rewards/accuracies": 0.75, "rewards/chosen": -0.25616970658302307, "rewards/margins": 0.24100124835968018, "rewards/rejected": -0.49717098474502563, "step": 505 }, { "epoch": 0.06, "learning_rate": 2.868313238909048e-07, "logits/chosen": -3.55409574508667, "logits/rejected": -3.3509416580200195, "logps/chosen": -486.7705078125, "logps/rejected": -302.6485595703125, "loss": 0.4174, "rewards/accuracies": 0.75, "rewards/chosen": -0.12468338757753372, "rewards/margins": 1.0725276470184326, "rewards/rejected": -1.1972111463546753, "step": 506 }, { "epoch": 0.06, "learning_rate": 2.867962074212806e-07, "logits/chosen": -2.9114534854888916, "logits/rejected": -2.97478985786438, "logps/chosen": -277.025390625, "logps/rejected": -264.81427001953125, "loss": 0.4664, "rewards/accuracies": 0.625, "rewards/chosen": -0.11956827342510223, "rewards/margins": 1.0786495208740234, "rewards/rejected": -1.1982176303863525, "step": 507 }, { "epoch": 0.06, "learning_rate": 2.8676109095165633e-07, "logits/chosen": -2.5504937171936035, "logits/rejected": -2.4858078956604004, "logps/chosen": -231.9882354736328, "logps/rejected": -273.6965637207031, "loss": 0.402, "rewards/accuracies": 1.0, "rewards/chosen": 0.19226795434951782, "rewards/margins": 0.802055835723877, "rewards/rejected": -0.6097878813743591, "step": 508 }, { "epoch": 0.06, "learning_rate": 2.8672597448203203e-07, "logits/chosen": -3.050672769546509, "logits/rejected": -2.942763328552246, "logps/chosen": -361.748779296875, "logps/rejected": -303.2430725097656, "loss": 0.5529, "rewards/accuracies": 0.75, "rewards/chosen": 0.1266341209411621, "rewards/margins": 0.8433701992034912, "rewards/rejected": -0.7167360782623291, "step": 509 }, { "epoch": 0.06, "learning_rate": 2.866908580124078e-07, "logits/chosen": -3.014805555343628, "logits/rejected": -3.192039966583252, "logps/chosen": -167.71812438964844, "logps/rejected": -253.79908752441406, "loss": 0.3678, "rewards/accuracies": 0.875, "rewards/chosen": -0.11787936091423035, "rewards/margins": 1.4172860383987427, "rewards/rejected": -1.535165548324585, "step": 510 }, { "epoch": 0.06, "learning_rate": 2.866557415427836e-07, "logits/chosen": -3.1891798973083496, "logits/rejected": -3.3985683917999268, "logps/chosen": -395.2935791015625, "logps/rejected": -233.57791137695312, "loss": 0.4813, "rewards/accuracies": 0.875, "rewards/chosen": 0.03097468614578247, "rewards/margins": 0.8894357681274414, "rewards/rejected": -0.8584611415863037, "step": 511 }, { "epoch": 0.06, "learning_rate": 2.866206250731593e-07, "logits/chosen": -2.7636375427246094, "logits/rejected": -2.7309622764587402, "logps/chosen": -188.4740447998047, "logps/rejected": -235.02081298828125, "loss": 0.5118, "rewards/accuracies": 0.75, "rewards/chosen": -0.12039600312709808, "rewards/margins": 0.714372456073761, "rewards/rejected": -0.8347684741020203, "step": 512 }, { "epoch": 0.06, "learning_rate": 2.8658550860353505e-07, "logits/chosen": -3.4364349842071533, "logits/rejected": -3.791431188583374, "logps/chosen": -148.3749237060547, "logps/rejected": -172.4364013671875, "loss": 0.6647, "rewards/accuracies": 0.5, "rewards/chosen": -0.47247448563575745, "rewards/margins": 0.7560961246490479, "rewards/rejected": -1.228570580482483, "step": 513 }, { "epoch": 0.06, "learning_rate": 2.865503921339108e-07, "logits/chosen": -3.0675241947174072, "logits/rejected": -3.2901792526245117, "logps/chosen": -213.7919464111328, "logps/rejected": -295.42535400390625, "loss": 0.6086, "rewards/accuracies": 0.75, "rewards/chosen": -0.11407937109470367, "rewards/margins": 0.9246984720230103, "rewards/rejected": -1.0387778282165527, "step": 514 }, { "epoch": 0.06, "learning_rate": 2.8651527566428656e-07, "logits/chosen": -3.1039838790893555, "logits/rejected": -3.328001022338867, "logps/chosen": -262.2862243652344, "logps/rejected": -229.57106018066406, "loss": 0.4118, "rewards/accuracies": 0.75, "rewards/chosen": 0.44690725207328796, "rewards/margins": 1.019927740097046, "rewards/rejected": -0.5730204582214355, "step": 515 }, { "epoch": 0.06, "learning_rate": 2.864801591946623e-07, "logits/chosen": -2.241164207458496, "logits/rejected": -2.1829233169555664, "logps/chosen": -220.9943389892578, "logps/rejected": -276.4043273925781, "loss": 0.7219, "rewards/accuracies": 0.625, "rewards/chosen": -0.12687012553215027, "rewards/margins": 0.48822373151779175, "rewards/rejected": -0.6150938868522644, "step": 516 }, { "epoch": 0.06, "learning_rate": 2.86445042725038e-07, "logits/chosen": -3.601105213165283, "logits/rejected": -3.429993152618408, "logps/chosen": -231.45730590820312, "logps/rejected": -247.46238708496094, "loss": 0.5065, "rewards/accuracies": 0.75, "rewards/chosen": -0.12420734018087387, "rewards/margins": 0.8599831461906433, "rewards/rejected": -0.9841905236244202, "step": 517 }, { "epoch": 0.06, "learning_rate": 2.8640992625541376e-07, "logits/chosen": -2.833491802215576, "logits/rejected": -2.5983874797821045, "logps/chosen": -291.8511657714844, "logps/rejected": -302.2479248046875, "loss": 0.2148, "rewards/accuracies": 1.0, "rewards/chosen": 0.2806636691093445, "rewards/margins": 1.7790451049804688, "rewards/rejected": -1.4983816146850586, "step": 518 }, { "epoch": 0.06, "learning_rate": 2.863748097857895e-07, "logits/chosen": -2.8953664302825928, "logits/rejected": -3.1274290084838867, "logps/chosen": -540.9337768554688, "logps/rejected": -346.3360290527344, "loss": 0.4475, "rewards/accuracies": 0.875, "rewards/chosen": 0.1636904776096344, "rewards/margins": 0.6888893842697144, "rewards/rejected": -0.5251989364624023, "step": 519 }, { "epoch": 0.06, "learning_rate": 2.8633969331616527e-07, "logits/chosen": -3.1299099922180176, "logits/rejected": -3.3590762615203857, "logps/chosen": -162.1237335205078, "logps/rejected": -263.94158935546875, "loss": 0.3476, "rewards/accuracies": 0.75, "rewards/chosen": 0.23379351198673248, "rewards/margins": 1.5890848636627197, "rewards/rejected": -1.3552911281585693, "step": 520 }, { "epoch": 0.06, "learning_rate": 2.86304576846541e-07, "logits/chosen": -3.154480457305908, "logits/rejected": -2.8855338096618652, "logps/chosen": -227.01904296875, "logps/rejected": -137.81228637695312, "loss": 0.7626, "rewards/accuracies": 0.5, "rewards/chosen": -0.43410974740982056, "rewards/margins": 0.21767541766166687, "rewards/rejected": -0.651785135269165, "step": 521 }, { "epoch": 0.06, "learning_rate": 2.862694603769167e-07, "logits/chosen": -3.1889216899871826, "logits/rejected": -3.2298495769500732, "logps/chosen": -328.3163757324219, "logps/rejected": -260.7103576660156, "loss": 0.4064, "rewards/accuracies": 0.875, "rewards/chosen": -0.02139928936958313, "rewards/margins": 0.8407351970672607, "rewards/rejected": -0.8621345162391663, "step": 522 }, { "epoch": 0.06, "learning_rate": 2.8623434390729253e-07, "logits/chosen": -2.878713607788086, "logits/rejected": -2.793660879135132, "logps/chosen": -276.17645263671875, "logps/rejected": -229.63475036621094, "loss": 0.5703, "rewards/accuracies": 0.75, "rewards/chosen": -0.035965923219919205, "rewards/margins": 0.6465068459510803, "rewards/rejected": -0.6824727058410645, "step": 523 }, { "epoch": 0.06, "learning_rate": 2.861992274376683e-07, "logits/chosen": -3.0597634315490723, "logits/rejected": -2.809943914413452, "logps/chosen": -224.89942932128906, "logps/rejected": -279.1510314941406, "loss": 0.5128, "rewards/accuracies": 0.875, "rewards/chosen": 0.17489421367645264, "rewards/margins": 0.5333244204521179, "rewards/rejected": -0.3584301769733429, "step": 524 }, { "epoch": 0.06, "learning_rate": 2.86164110968044e-07, "logits/chosen": -2.7586112022399902, "logits/rejected": -2.7360591888427734, "logps/chosen": -281.439697265625, "logps/rejected": -299.2528076171875, "loss": 0.5781, "rewards/accuracies": 0.875, "rewards/chosen": 0.05590415000915527, "rewards/margins": 0.2985103130340576, "rewards/rejected": -0.24260616302490234, "step": 525 }, { "epoch": 0.06, "learning_rate": 2.8612899449841974e-07, "logits/chosen": -3.474358558654785, "logits/rejected": -3.768622875213623, "logps/chosen": -78.41242218017578, "logps/rejected": -221.89361572265625, "loss": 0.2613, "rewards/accuracies": 0.875, "rewards/chosen": 0.07666325569152832, "rewards/margins": 2.6205077171325684, "rewards/rejected": -2.54384446144104, "step": 526 }, { "epoch": 0.06, "learning_rate": 2.860938780287955e-07, "logits/chosen": -3.536370277404785, "logits/rejected": -3.3360824584960938, "logps/chosen": -185.61795043945312, "logps/rejected": -164.88418579101562, "loss": 0.704, "rewards/accuracies": 0.625, "rewards/chosen": -0.2838400602340698, "rewards/margins": 0.46942323446273804, "rewards/rejected": -0.7532632350921631, "step": 527 }, { "epoch": 0.06, "learning_rate": 2.8605876155917125e-07, "logits/chosen": -2.358625888824463, "logits/rejected": -2.5217840671539307, "logps/chosen": -320.2193298339844, "logps/rejected": -237.72866821289062, "loss": 0.5965, "rewards/accuracies": 0.75, "rewards/chosen": -0.2599813640117645, "rewards/margins": 0.35364556312561035, "rewards/rejected": -0.6136269569396973, "step": 528 }, { "epoch": 0.06, "learning_rate": 2.86023645089547e-07, "logits/chosen": -3.1081275939941406, "logits/rejected": -3.059229612350464, "logps/chosen": -369.98455810546875, "logps/rejected": -276.3141174316406, "loss": 0.544, "rewards/accuracies": 0.625, "rewards/chosen": -0.12163963913917542, "rewards/margins": 0.72295743227005, "rewards/rejected": -0.8445970416069031, "step": 529 }, { "epoch": 0.06, "learning_rate": 2.859885286199227e-07, "logits/chosen": -3.38150691986084, "logits/rejected": -3.1757264137268066, "logps/chosen": -177.9127655029297, "logps/rejected": -224.57460021972656, "loss": 0.7702, "rewards/accuracies": 0.5, "rewards/chosen": -0.49800533056259155, "rewards/margins": 0.42585068941116333, "rewards/rejected": -0.9238559603691101, "step": 530 }, { "epoch": 0.06, "learning_rate": 2.8595341215029846e-07, "logits/chosen": -3.8444156646728516, "logits/rejected": -3.6734280586242676, "logps/chosen": -104.33226776123047, "logps/rejected": -82.72016143798828, "loss": 0.6482, "rewards/accuracies": 0.625, "rewards/chosen": -0.14879372715950012, "rewards/margins": 0.3363971710205078, "rewards/rejected": -0.48519089818000793, "step": 531 }, { "epoch": 0.06, "learning_rate": 2.859182956806742e-07, "logits/chosen": -2.9359076023101807, "logits/rejected": -3.0449743270874023, "logps/chosen": -228.01751708984375, "logps/rejected": -180.79644775390625, "loss": 0.6173, "rewards/accuracies": 0.625, "rewards/chosen": -0.2374809980392456, "rewards/margins": 0.3511824905872345, "rewards/rejected": -0.5886634588241577, "step": 532 }, { "epoch": 0.06, "learning_rate": 2.8588317921104997e-07, "logits/chosen": -2.995626449584961, "logits/rejected": -2.8867599964141846, "logps/chosen": -323.6828918457031, "logps/rejected": -179.494873046875, "loss": 0.352, "rewards/accuracies": 0.875, "rewards/chosen": 0.5366920232772827, "rewards/margins": 1.1864476203918457, "rewards/rejected": -0.6497554779052734, "step": 533 }, { "epoch": 0.06, "learning_rate": 2.858480627414257e-07, "logits/chosen": -3.669623613357544, "logits/rejected": -3.7601470947265625, "logps/chosen": -500.26806640625, "logps/rejected": -318.5075988769531, "loss": 0.4036, "rewards/accuracies": 0.75, "rewards/chosen": -0.4221084713935852, "rewards/margins": 1.10366690158844, "rewards/rejected": -1.52577543258667, "step": 534 }, { "epoch": 0.06, "learning_rate": 2.8581294627180147e-07, "logits/chosen": -3.1554794311523438, "logits/rejected": -3.3461742401123047, "logps/chosen": -462.300537109375, "logps/rejected": -228.64552307128906, "loss": 0.702, "rewards/accuracies": 0.625, "rewards/chosen": -0.20671509206295013, "rewards/margins": 0.4693266749382019, "rewards/rejected": -0.6760417819023132, "step": 535 }, { "epoch": 0.06, "learning_rate": 2.8577782980217723e-07, "logits/chosen": -2.9409327507019043, "logits/rejected": -2.8004651069641113, "logps/chosen": -169.707275390625, "logps/rejected": -261.3868713378906, "loss": 0.3896, "rewards/accuracies": 0.75, "rewards/chosen": 0.016493968665599823, "rewards/margins": 1.1234095096588135, "rewards/rejected": -1.1069154739379883, "step": 536 }, { "epoch": 0.06, "learning_rate": 2.85742713332553e-07, "logits/chosen": -3.4502832889556885, "logits/rejected": -3.249462604522705, "logps/chosen": -448.35784912109375, "logps/rejected": -448.76495361328125, "loss": 0.4964, "rewards/accuracies": 0.75, "rewards/chosen": 0.04303455352783203, "rewards/margins": 0.6656639575958252, "rewards/rejected": -0.6226294040679932, "step": 537 }, { "epoch": 0.06, "learning_rate": 2.857075968629287e-07, "logits/chosen": -3.094134569168091, "logits/rejected": -3.204838991165161, "logps/chosen": -92.15776062011719, "logps/rejected": -279.9194030761719, "loss": 0.6079, "rewards/accuracies": 0.5, "rewards/chosen": 0.08525685966014862, "rewards/margins": 0.5060462951660156, "rewards/rejected": -0.4207894802093506, "step": 538 }, { "epoch": 0.06, "learning_rate": 2.8567248039330444e-07, "logits/chosen": -3.061634063720703, "logits/rejected": -3.2320733070373535, "logps/chosen": -343.7528991699219, "logps/rejected": -294.998046875, "loss": 0.851, "rewards/accuracies": 0.5, "rewards/chosen": -0.45737898349761963, "rewards/margins": 0.07155458629131317, "rewards/rejected": -0.5289335250854492, "step": 539 }, { "epoch": 0.06, "learning_rate": 2.856373639236802e-07, "logits/chosen": -2.809377670288086, "logits/rejected": -3.2030439376831055, "logps/chosen": -330.2304992675781, "logps/rejected": -213.47561645507812, "loss": 0.5892, "rewards/accuracies": 0.75, "rewards/chosen": 0.0012353844940662384, "rewards/margins": 0.47216248512268066, "rewards/rejected": -0.4709271192550659, "step": 540 }, { "epoch": 0.06, "learning_rate": 2.8560224745405594e-07, "logits/chosen": -2.3284294605255127, "logits/rejected": -2.863255023956299, "logps/chosen": -535.4571533203125, "logps/rejected": -307.80303955078125, "loss": 0.5115, "rewards/accuracies": 0.5, "rewards/chosen": -0.3536966145038605, "rewards/margins": 0.9567894339561462, "rewards/rejected": -1.310486078262329, "step": 541 }, { "epoch": 0.06, "learning_rate": 2.855671309844317e-07, "logits/chosen": -2.8721399307250977, "logits/rejected": -2.9318065643310547, "logps/chosen": -305.16949462890625, "logps/rejected": -284.3133239746094, "loss": 0.4169, "rewards/accuracies": 0.75, "rewards/chosen": -0.06329341232776642, "rewards/margins": 1.091465711593628, "rewards/rejected": -1.154759168624878, "step": 542 }, { "epoch": 0.06, "learning_rate": 2.855320145148074e-07, "logits/chosen": -3.7321062088012695, "logits/rejected": -2.9591822624206543, "logps/chosen": -475.44415283203125, "logps/rejected": -153.30538940429688, "loss": 0.382, "rewards/accuracies": 1.0, "rewards/chosen": -0.15792134404182434, "rewards/margins": 0.8902776837348938, "rewards/rejected": -1.048198938369751, "step": 543 }, { "epoch": 0.06, "learning_rate": 2.8549689804518315e-07, "logits/chosen": -2.7988762855529785, "logits/rejected": -2.767940044403076, "logps/chosen": -243.82025146484375, "logps/rejected": -266.5516357421875, "loss": 0.3299, "rewards/accuracies": 0.875, "rewards/chosen": 0.21011295914649963, "rewards/margins": 1.3443522453308105, "rewards/rejected": -1.1342393159866333, "step": 544 }, { "epoch": 0.06, "learning_rate": 2.8546178157555896e-07, "logits/chosen": -3.634690999984741, "logits/rejected": -3.778970241546631, "logps/chosen": -319.986083984375, "logps/rejected": -306.7301025390625, "loss": 0.337, "rewards/accuracies": 0.875, "rewards/chosen": -0.0906183123588562, "rewards/margins": 1.3068578243255615, "rewards/rejected": -1.3974761962890625, "step": 545 }, { "epoch": 0.06, "learning_rate": 2.8542666510593466e-07, "logits/chosen": -2.377859354019165, "logits/rejected": -2.68601655960083, "logps/chosen": -151.09364318847656, "logps/rejected": -230.4820556640625, "loss": 0.4721, "rewards/accuracies": 0.875, "rewards/chosen": -0.012478187680244446, "rewards/margins": 0.777976393699646, "rewards/rejected": -0.790454626083374, "step": 546 }, { "epoch": 0.06, "learning_rate": 2.853915486363104e-07, "logits/chosen": -3.0171828269958496, "logits/rejected": -2.935272216796875, "logps/chosen": -210.77883911132812, "logps/rejected": -266.1453552246094, "loss": 0.4129, "rewards/accuracies": 0.875, "rewards/chosen": 0.11610323935747147, "rewards/margins": 1.0423280000686646, "rewards/rejected": -0.9262247085571289, "step": 547 }, { "epoch": 0.06, "learning_rate": 2.8535643216668617e-07, "logits/chosen": -2.657559871673584, "logits/rejected": -2.763437032699585, "logps/chosen": -147.03570556640625, "logps/rejected": -237.77920532226562, "loss": 0.4567, "rewards/accuracies": 0.75, "rewards/chosen": 0.39586618542671204, "rewards/margins": 0.6470467448234558, "rewards/rejected": -0.2511805593967438, "step": 548 }, { "epoch": 0.06, "learning_rate": 2.853213156970619e-07, "logits/chosen": -3.5417895317077637, "logits/rejected": -3.018122911453247, "logps/chosen": -303.20257568359375, "logps/rejected": -224.96017456054688, "loss": 0.3204, "rewards/accuracies": 0.875, "rewards/chosen": 0.184012770652771, "rewards/margins": 1.318422555923462, "rewards/rejected": -1.134409785270691, "step": 549 }, { "epoch": 0.06, "learning_rate": 2.852861992274377e-07, "logits/chosen": -2.5790486335754395, "logits/rejected": -2.585319995880127, "logps/chosen": -255.1060791015625, "logps/rejected": -236.447265625, "loss": 0.7813, "rewards/accuracies": 0.375, "rewards/chosen": -0.5112833380699158, "rewards/margins": -0.0824710801243782, "rewards/rejected": -0.4288122057914734, "step": 550 }, { "epoch": 0.06, "learning_rate": 2.852510827578134e-07, "logits/chosen": -2.172697067260742, "logits/rejected": -2.159735918045044, "logps/chosen": -415.64697265625, "logps/rejected": -255.6728973388672, "loss": 0.6275, "rewards/accuracies": 0.75, "rewards/chosen": 0.10474257916212082, "rewards/margins": 0.5209324359893799, "rewards/rejected": -0.41618984937667847, "step": 551 }, { "epoch": 0.06, "learning_rate": 2.8521596628818913e-07, "logits/chosen": -3.2614693641662598, "logits/rejected": -3.245962619781494, "logps/chosen": -244.94973754882812, "logps/rejected": -194.36509704589844, "loss": 0.4547, "rewards/accuracies": 0.75, "rewards/chosen": -0.1512511819601059, "rewards/margins": 1.0718021392822266, "rewards/rejected": -1.223053216934204, "step": 552 }, { "epoch": 0.06, "learning_rate": 2.851808498185649e-07, "logits/chosen": -3.158853530883789, "logits/rejected": -3.2057247161865234, "logps/chosen": -360.01898193359375, "logps/rejected": -309.0932922363281, "loss": 0.584, "rewards/accuracies": 0.75, "rewards/chosen": -0.16631008684635162, "rewards/margins": 1.0384345054626465, "rewards/rejected": -1.204744577407837, "step": 553 }, { "epoch": 0.06, "learning_rate": 2.8514573334894064e-07, "logits/chosen": -3.1241519451141357, "logits/rejected": -3.2429823875427246, "logps/chosen": -310.84832763671875, "logps/rejected": -270.1993713378906, "loss": 0.5723, "rewards/accuracies": 0.75, "rewards/chosen": -0.03085634857416153, "rewards/margins": 0.45363301038742065, "rewards/rejected": -0.4844893515110016, "step": 554 }, { "epoch": 0.06, "learning_rate": 2.851106168793164e-07, "logits/chosen": -2.5227901935577393, "logits/rejected": -2.5692367553710938, "logps/chosen": -154.5811004638672, "logps/rejected": -177.42520141601562, "loss": 0.5283, "rewards/accuracies": 0.625, "rewards/chosen": -0.22822032868862152, "rewards/margins": 0.5128406286239624, "rewards/rejected": -0.7410610318183899, "step": 555 }, { "epoch": 0.06, "learning_rate": 2.8507550040969215e-07, "logits/chosen": -2.639172315597534, "logits/rejected": -2.875760078430176, "logps/chosen": -213.1607208251953, "logps/rejected": -234.0604248046875, "loss": 0.4478, "rewards/accuracies": 0.875, "rewards/chosen": 0.15493053197860718, "rewards/margins": 0.7433271408081055, "rewards/rejected": -0.5883966684341431, "step": 556 }, { "epoch": 0.06, "learning_rate": 2.850403839400679e-07, "logits/chosen": -2.6481471061706543, "logits/rejected": -2.764463424682617, "logps/chosen": -134.46688842773438, "logps/rejected": -163.86424255371094, "loss": 0.52, "rewards/accuracies": 0.75, "rewards/chosen": -0.42262765765190125, "rewards/margins": 0.8071228861808777, "rewards/rejected": -1.2297505140304565, "step": 557 }, { "epoch": 0.06, "learning_rate": 2.8500526747044365e-07, "logits/chosen": -3.5028622150421143, "logits/rejected": -3.6843693256378174, "logps/chosen": -335.29046630859375, "logps/rejected": -299.91973876953125, "loss": 0.6575, "rewards/accuracies": 0.5, "rewards/chosen": -0.43093204498291016, "rewards/margins": 0.5989680290222168, "rewards/rejected": -1.029900074005127, "step": 558 }, { "epoch": 0.06, "learning_rate": 2.8497015100081935e-07, "logits/chosen": -2.535102128982544, "logits/rejected": -2.6182827949523926, "logps/chosen": -164.6511688232422, "logps/rejected": -272.6824035644531, "loss": 0.7012, "rewards/accuracies": 0.875, "rewards/chosen": 0.08056869357824326, "rewards/margins": 0.9240962266921997, "rewards/rejected": -0.8435275554656982, "step": 559 }, { "epoch": 0.06, "learning_rate": 2.849350345311951e-07, "logits/chosen": -2.699617624282837, "logits/rejected": -2.6587982177734375, "logps/chosen": -246.6691131591797, "logps/rejected": -273.34625244140625, "loss": 0.5611, "rewards/accuracies": 0.75, "rewards/chosen": 0.10043494403362274, "rewards/margins": 0.642559289932251, "rewards/rejected": -0.5421243906021118, "step": 560 }, { "epoch": 0.06, "learning_rate": 2.8489991806157086e-07, "logits/chosen": -3.2646660804748535, "logits/rejected": -3.3632395267486572, "logps/chosen": -347.6244201660156, "logps/rejected": -384.0805358886719, "loss": 0.6342, "rewards/accuracies": 0.625, "rewards/chosen": -0.3999229371547699, "rewards/margins": 0.5680125951766968, "rewards/rejected": -0.9679355621337891, "step": 561 }, { "epoch": 0.06, "learning_rate": 2.848648015919466e-07, "logits/chosen": -2.572514533996582, "logits/rejected": -2.8451180458068848, "logps/chosen": -217.26177978515625, "logps/rejected": -221.21568298339844, "loss": 0.4495, "rewards/accuracies": 0.75, "rewards/chosen": -0.08519965410232544, "rewards/margins": 1.5005558729171753, "rewards/rejected": -1.5857555866241455, "step": 562 }, { "epoch": 0.06, "learning_rate": 2.8482968512232237e-07, "logits/chosen": -2.9453463554382324, "logits/rejected": -3.049625873565674, "logps/chosen": -404.310546875, "logps/rejected": -277.104248046875, "loss": 0.453, "rewards/accuracies": 0.625, "rewards/chosen": 0.24798595905303955, "rewards/margins": 0.9206985235214233, "rewards/rejected": -0.672712504863739, "step": 563 }, { "epoch": 0.07, "learning_rate": 2.8479456865269807e-07, "logits/chosen": -3.2859647274017334, "logits/rejected": -3.3170647621154785, "logps/chosen": -228.65225219726562, "logps/rejected": -209.265869140625, "loss": 0.717, "rewards/accuracies": 0.375, "rewards/chosen": -0.22715182602405548, "rewards/margins": 0.8490610718727112, "rewards/rejected": -1.0762128829956055, "step": 564 }, { "epoch": 0.07, "learning_rate": 2.847594521830738e-07, "logits/chosen": -3.0620079040527344, "logits/rejected": -3.1941654682159424, "logps/chosen": -300.74688720703125, "logps/rejected": -348.821044921875, "loss": 0.5206, "rewards/accuracies": 0.75, "rewards/chosen": -0.31530195474624634, "rewards/margins": 0.5786973834037781, "rewards/rejected": -0.8939993381500244, "step": 565 }, { "epoch": 0.07, "learning_rate": 2.847243357134496e-07, "logits/chosen": -3.3365564346313477, "logits/rejected": -3.2249889373779297, "logps/chosen": -274.286865234375, "logps/rejected": -180.2883758544922, "loss": 0.7249, "rewards/accuracies": 0.25, "rewards/chosen": -0.3781934380531311, "rewards/margins": 0.1618926078081131, "rewards/rejected": -0.540086030960083, "step": 566 }, { "epoch": 0.07, "learning_rate": 2.8468921924382533e-07, "logits/chosen": -2.9601683616638184, "logits/rejected": -2.6350760459899902, "logps/chosen": -315.9713439941406, "logps/rejected": -250.956787109375, "loss": 0.5427, "rewards/accuracies": 0.625, "rewards/chosen": 0.17157602310180664, "rewards/margins": 0.7589461803436279, "rewards/rejected": -0.5873702168464661, "step": 567 }, { "epoch": 0.07, "learning_rate": 2.846541027742011e-07, "logits/chosen": -2.83595609664917, "logits/rejected": -2.7099967002868652, "logps/chosen": -259.77587890625, "logps/rejected": -176.6959228515625, "loss": 0.5304, "rewards/accuracies": 0.625, "rewards/chosen": 0.2183617204427719, "rewards/margins": 0.5292070508003235, "rewards/rejected": -0.3108453154563904, "step": 568 }, { "epoch": 0.07, "learning_rate": 2.8461898630457684e-07, "logits/chosen": -2.9993205070495605, "logits/rejected": -2.9990499019622803, "logps/chosen": -211.07571411132812, "logps/rejected": -230.1943359375, "loss": 0.4211, "rewards/accuracies": 0.875, "rewards/chosen": 0.26790815591812134, "rewards/margins": 0.7439912557601929, "rewards/rejected": -0.47608309984207153, "step": 569 }, { "epoch": 0.07, "learning_rate": 2.845838698349526e-07, "logits/chosen": -2.6326541900634766, "logits/rejected": -2.629570960998535, "logps/chosen": -215.5721435546875, "logps/rejected": -264.63726806640625, "loss": 0.4937, "rewards/accuracies": 0.875, "rewards/chosen": 0.17606694996356964, "rewards/margins": 0.7536322474479675, "rewards/rejected": -0.5775653123855591, "step": 570 }, { "epoch": 0.07, "learning_rate": 2.8454875336532835e-07, "logits/chosen": -2.565608501434326, "logits/rejected": -2.7435672283172607, "logps/chosen": -419.363037109375, "logps/rejected": -107.31320190429688, "loss": 1.0601, "rewards/accuracies": 0.25, "rewards/chosen": -0.7615485191345215, "rewards/margins": -0.4368448257446289, "rewards/rejected": -0.3247036933898926, "step": 571 }, { "epoch": 0.07, "learning_rate": 2.8451363689570405e-07, "logits/chosen": -3.5370476245880127, "logits/rejected": -3.39475679397583, "logps/chosen": -233.05548095703125, "logps/rejected": -258.10009765625, "loss": 0.7058, "rewards/accuracies": 0.75, "rewards/chosen": -0.44455212354660034, "rewards/margins": 0.426350474357605, "rewards/rejected": -0.8709025382995605, "step": 572 }, { "epoch": 0.07, "learning_rate": 2.844785204260798e-07, "logits/chosen": -3.0539088249206543, "logits/rejected": -3.003988742828369, "logps/chosen": -200.1059112548828, "logps/rejected": -204.16909790039062, "loss": 0.4893, "rewards/accuracies": 1.0, "rewards/chosen": 0.01456526666879654, "rewards/margins": 0.5757176280021667, "rewards/rejected": -0.5611523389816284, "step": 573 }, { "epoch": 0.07, "learning_rate": 2.8444340395645556e-07, "logits/chosen": -3.422623872756958, "logits/rejected": -3.2860288619995117, "logps/chosen": -540.5427856445312, "logps/rejected": -292.38372802734375, "loss": 0.2989, "rewards/accuracies": 1.0, "rewards/chosen": -0.024834223091602325, "rewards/margins": 1.3464725017547607, "rewards/rejected": -1.3713066577911377, "step": 574 }, { "epoch": 0.07, "learning_rate": 2.844082874868313e-07, "logits/chosen": -2.7679200172424316, "logits/rejected": -3.0395469665527344, "logps/chosen": -379.524169921875, "logps/rejected": -180.01467895507812, "loss": 0.6238, "rewards/accuracies": 0.625, "rewards/chosen": -0.31150147318840027, "rewards/margins": 0.53651362657547, "rewards/rejected": -0.8480151295661926, "step": 575 }, { "epoch": 0.07, "learning_rate": 2.8437317101720706e-07, "logits/chosen": -2.912106513977051, "logits/rejected": -3.076009511947632, "logps/chosen": -186.04254150390625, "logps/rejected": -201.83538818359375, "loss": 0.4337, "rewards/accuracies": 0.875, "rewards/chosen": -0.3043007254600525, "rewards/margins": 0.9681100845336914, "rewards/rejected": -1.2724108695983887, "step": 576 }, { "epoch": 0.07, "learning_rate": 2.843380545475828e-07, "logits/chosen": -2.464355230331421, "logits/rejected": -2.833991050720215, "logps/chosen": -392.5476379394531, "logps/rejected": -278.966552734375, "loss": 0.7362, "rewards/accuracies": 0.5, "rewards/chosen": -0.3340369462966919, "rewards/margins": 0.13450157642364502, "rewards/rejected": -0.4685384929180145, "step": 577 }, { "epoch": 0.07, "learning_rate": 2.843029380779585e-07, "logits/chosen": -3.2506563663482666, "logits/rejected": -3.0696945190429688, "logps/chosen": -190.8265380859375, "logps/rejected": -128.91940307617188, "loss": 0.6402, "rewards/accuracies": 0.5, "rewards/chosen": 0.027650974690914154, "rewards/margins": 0.15454694628715515, "rewards/rejected": -0.1268959790468216, "step": 578 }, { "epoch": 0.07, "learning_rate": 2.842678216083343e-07, "logits/chosen": -3.0331614017486572, "logits/rejected": -3.1520509719848633, "logps/chosen": -386.54913330078125, "logps/rejected": -261.4997863769531, "loss": 0.4125, "rewards/accuracies": 0.625, "rewards/chosen": -0.22909235954284668, "rewards/margins": 1.250894546508789, "rewards/rejected": -1.4799869060516357, "step": 579 }, { "epoch": 0.07, "learning_rate": 2.8423270513871e-07, "logits/chosen": -3.1338346004486084, "logits/rejected": -3.2878732681274414, "logps/chosen": -343.92596435546875, "logps/rejected": -157.74884033203125, "loss": 0.4756, "rewards/accuracies": 0.75, "rewards/chosen": 0.16051925718784332, "rewards/margins": 0.911997377872467, "rewards/rejected": -0.7514780759811401, "step": 580 }, { "epoch": 0.07, "learning_rate": 2.841975886690858e-07, "logits/chosen": -3.133943796157837, "logits/rejected": -3.0379106998443604, "logps/chosen": -131.99093627929688, "logps/rejected": -193.8929443359375, "loss": 0.6252, "rewards/accuracies": 0.75, "rewards/chosen": -0.1466299593448639, "rewards/margins": 0.45224130153656006, "rewards/rejected": -0.5988712906837463, "step": 581 }, { "epoch": 0.07, "learning_rate": 2.8416247219946153e-07, "logits/chosen": -3.051846742630005, "logits/rejected": -2.7714009284973145, "logps/chosen": -259.3820495605469, "logps/rejected": -202.49575805664062, "loss": 0.3842, "rewards/accuracies": 0.875, "rewards/chosen": 0.1096249595284462, "rewards/margins": 0.9627222418785095, "rewards/rejected": -0.8530972599983215, "step": 582 }, { "epoch": 0.07, "learning_rate": 2.841273557298373e-07, "logits/chosen": -2.690918445587158, "logits/rejected": -2.485649585723877, "logps/chosen": -263.5823974609375, "logps/rejected": -240.57923889160156, "loss": 0.6488, "rewards/accuracies": 0.5, "rewards/chosen": 0.1939014494419098, "rewards/margins": 0.19574642181396484, "rewards/rejected": -0.0018449574708938599, "step": 583 }, { "epoch": 0.07, "learning_rate": 2.8409223926021304e-07, "logits/chosen": -3.312138080596924, "logits/rejected": -2.97157621383667, "logps/chosen": -284.61541748046875, "logps/rejected": -185.38809204101562, "loss": 0.3772, "rewards/accuracies": 0.75, "rewards/chosen": 0.2834477722644806, "rewards/margins": 1.2873646020889282, "rewards/rejected": -1.0039167404174805, "step": 584 }, { "epoch": 0.07, "learning_rate": 2.840571227905888e-07, "logits/chosen": -2.740142345428467, "logits/rejected": -2.71858286857605, "logps/chosen": -219.63058471679688, "logps/rejected": -170.3091583251953, "loss": 0.5681, "rewards/accuracies": 0.625, "rewards/chosen": -0.05971723422408104, "rewards/margins": 0.7796342968940735, "rewards/rejected": -0.8393515348434448, "step": 585 }, { "epoch": 0.07, "learning_rate": 2.840220063209645e-07, "logits/chosen": -2.3721702098846436, "logits/rejected": -2.319884777069092, "logps/chosen": -239.2967529296875, "logps/rejected": -312.5836181640625, "loss": 0.5475, "rewards/accuracies": 0.5, "rewards/chosen": -0.05204177647829056, "rewards/margins": 0.545594334602356, "rewards/rejected": -0.5976361036300659, "step": 586 }, { "epoch": 0.07, "learning_rate": 2.8398688985134025e-07, "logits/chosen": -3.438483476638794, "logits/rejected": -3.3418476581573486, "logps/chosen": -288.30047607421875, "logps/rejected": -322.61114501953125, "loss": 0.3558, "rewards/accuracies": 1.0, "rewards/chosen": 0.05761921405792236, "rewards/margins": 1.2904915809631348, "rewards/rejected": -1.2328723669052124, "step": 587 }, { "epoch": 0.07, "learning_rate": 2.83951773381716e-07, "logits/chosen": -3.420330047607422, "logits/rejected": -3.147935390472412, "logps/chosen": -244.6639404296875, "logps/rejected": -192.90037536621094, "loss": 0.6899, "rewards/accuracies": 0.5, "rewards/chosen": -0.43344008922576904, "rewards/margins": 0.7481533288955688, "rewards/rejected": -1.181593418121338, "step": 588 }, { "epoch": 0.07, "learning_rate": 2.8391665691209176e-07, "logits/chosen": -3.1110801696777344, "logits/rejected": -2.820272922515869, "logps/chosen": -364.3965759277344, "logps/rejected": -333.96142578125, "loss": 0.458, "rewards/accuracies": 0.75, "rewards/chosen": -0.2278439998626709, "rewards/margins": 1.2228749990463257, "rewards/rejected": -1.4507191181182861, "step": 589 }, { "epoch": 0.07, "learning_rate": 2.838815404424675e-07, "logits/chosen": -2.5807666778564453, "logits/rejected": -2.401181697845459, "logps/chosen": -222.55657958984375, "logps/rejected": -183.37710571289062, "loss": 0.5474, "rewards/accuracies": 0.875, "rewards/chosen": -0.07202743738889694, "rewards/margins": 0.4343627691268921, "rewards/rejected": -0.5063902139663696, "step": 590 }, { "epoch": 0.07, "learning_rate": 2.8384642397284327e-07, "logits/chosen": -3.7373287677764893, "logits/rejected": -3.380686044692993, "logps/chosen": -368.79302978515625, "logps/rejected": -349.9984130859375, "loss": 0.5687, "rewards/accuracies": 0.875, "rewards/chosen": 0.07392235100269318, "rewards/margins": 0.5646569728851318, "rewards/rejected": -0.49073460698127747, "step": 591 }, { "epoch": 0.07, "learning_rate": 2.83811307503219e-07, "logits/chosen": -3.299287796020508, "logits/rejected": -2.932203531265259, "logps/chosen": -288.1181945800781, "logps/rejected": -293.4860534667969, "loss": 0.3414, "rewards/accuracies": 0.75, "rewards/chosen": 0.1486733853816986, "rewards/margins": 1.4948911666870117, "rewards/rejected": -1.3462179899215698, "step": 592 }, { "epoch": 0.07, "learning_rate": 2.8377619103359477e-07, "logits/chosen": -3.1409854888916016, "logits/rejected": -3.1719956398010254, "logps/chosen": -367.2176208496094, "logps/rejected": -224.2695770263672, "loss": 0.4465, "rewards/accuracies": 0.75, "rewards/chosen": 0.2023565024137497, "rewards/margins": 0.7969527840614319, "rewards/rejected": -0.5945962071418762, "step": 593 }, { "epoch": 0.07, "learning_rate": 2.837410745639705e-07, "logits/chosen": -3.666069746017456, "logits/rejected": -3.3207573890686035, "logps/chosen": -376.62158203125, "logps/rejected": -162.41262817382812, "loss": 0.7542, "rewards/accuracies": 0.5, "rewards/chosen": -0.4511701166629791, "rewards/margins": 0.00042928755283355713, "rewards/rejected": -0.4515994191169739, "step": 594 }, { "epoch": 0.07, "learning_rate": 2.8370595809434623e-07, "logits/chosen": -3.1518170833587646, "logits/rejected": -3.2193751335144043, "logps/chosen": -229.4091339111328, "logps/rejected": -188.7713623046875, "loss": 0.4416, "rewards/accuracies": 0.625, "rewards/chosen": -0.16101792454719543, "rewards/margins": 1.0197718143463135, "rewards/rejected": -1.1807897090911865, "step": 595 }, { "epoch": 0.07, "learning_rate": 2.83670841624722e-07, "logits/chosen": -3.2547640800476074, "logits/rejected": -3.5978660583496094, "logps/chosen": -145.9723663330078, "logps/rejected": -346.95587158203125, "loss": 0.2408, "rewards/accuracies": 0.875, "rewards/chosen": 0.11018386483192444, "rewards/margins": 1.9690461158752441, "rewards/rejected": -1.8588621616363525, "step": 596 }, { "epoch": 0.07, "learning_rate": 2.8363572515509774e-07, "logits/chosen": -3.633589506149292, "logits/rejected": -3.2893996238708496, "logps/chosen": -191.10833740234375, "logps/rejected": -169.20172119140625, "loss": 0.6649, "rewards/accuracies": 0.5, "rewards/chosen": -0.039100948721170425, "rewards/margins": 0.3054118752479553, "rewards/rejected": -0.34451279044151306, "step": 597 }, { "epoch": 0.07, "learning_rate": 2.836006086854735e-07, "logits/chosen": -3.276345729827881, "logits/rejected": -3.1193535327911377, "logps/chosen": -238.8255157470703, "logps/rejected": -265.792724609375, "loss": 0.2981, "rewards/accuracies": 0.875, "rewards/chosen": 0.039712145924568176, "rewards/margins": 1.7417290210723877, "rewards/rejected": -1.702016830444336, "step": 598 }, { "epoch": 0.07, "learning_rate": 2.835654922158492e-07, "logits/chosen": -3.36613130569458, "logits/rejected": -3.3966870307922363, "logps/chosen": -248.4036865234375, "logps/rejected": -258.8201904296875, "loss": 0.3471, "rewards/accuracies": 0.875, "rewards/chosen": 0.05074310302734375, "rewards/margins": 1.2298450469970703, "rewards/rejected": -1.1791019439697266, "step": 599 }, { "epoch": 0.07, "learning_rate": 2.8353037574622494e-07, "logits/chosen": -2.8329219818115234, "logits/rejected": -2.881606101989746, "logps/chosen": -315.7949523925781, "logps/rejected": -189.22109985351562, "loss": 0.5293, "rewards/accuracies": 0.625, "rewards/chosen": -0.09626045823097229, "rewards/margins": 0.9568472504615784, "rewards/rejected": -1.053107738494873, "step": 600 }, { "epoch": 0.07, "learning_rate": 2.8349525927660075e-07, "logits/chosen": -3.479264259338379, "logits/rejected": -3.2550079822540283, "logps/chosen": -371.71820068359375, "logps/rejected": -378.53009033203125, "loss": 0.571, "rewards/accuracies": 0.75, "rewards/chosen": -0.048086121678352356, "rewards/margins": 0.7984611392021179, "rewards/rejected": -0.8465472459793091, "step": 601 }, { "epoch": 0.07, "learning_rate": 2.8346014280697645e-07, "logits/chosen": -2.9838826656341553, "logits/rejected": -2.9615654945373535, "logps/chosen": -151.2657012939453, "logps/rejected": -101.64781188964844, "loss": 0.6159, "rewards/accuracies": 0.625, "rewards/chosen": 0.1190408393740654, "rewards/margins": 0.45836377143859863, "rewards/rejected": -0.33932292461395264, "step": 602 }, { "epoch": 0.07, "learning_rate": 2.834250263373522e-07, "logits/chosen": -3.8918468952178955, "logits/rejected": -3.9071686267852783, "logps/chosen": -215.66847229003906, "logps/rejected": -262.21661376953125, "loss": 0.4703, "rewards/accuracies": 0.5, "rewards/chosen": -0.12405318021774292, "rewards/margins": 1.2712405920028687, "rewards/rejected": -1.3952938318252563, "step": 603 }, { "epoch": 0.07, "learning_rate": 2.8338990986772796e-07, "logits/chosen": -3.230825185775757, "logits/rejected": -3.391624689102173, "logps/chosen": -367.1190185546875, "logps/rejected": -344.9413757324219, "loss": 0.9151, "rewards/accuracies": 0.5, "rewards/chosen": -0.3344690501689911, "rewards/margins": 0.07447004318237305, "rewards/rejected": -0.40893906354904175, "step": 604 }, { "epoch": 0.07, "learning_rate": 2.833547933981037e-07, "logits/chosen": -3.498638153076172, "logits/rejected": -3.279733180999756, "logps/chosen": -309.2095031738281, "logps/rejected": -318.4519958496094, "loss": 0.3328, "rewards/accuracies": 0.75, "rewards/chosen": 0.18286341428756714, "rewards/margins": 1.473036766052246, "rewards/rejected": -1.2901732921600342, "step": 605 }, { "epoch": 0.07, "learning_rate": 2.8331967692847947e-07, "logits/chosen": -3.579416275024414, "logits/rejected": -3.324866533279419, "logps/chosen": -226.84814453125, "logps/rejected": -169.96875, "loss": 0.5737, "rewards/accuracies": 0.625, "rewards/chosen": -0.5237652063369751, "rewards/margins": 0.8228992223739624, "rewards/rejected": -1.3466644287109375, "step": 606 }, { "epoch": 0.07, "learning_rate": 2.8328456045885517e-07, "logits/chosen": -3.7990610599517822, "logits/rejected": -3.493673324584961, "logps/chosen": -190.21429443359375, "logps/rejected": -192.6749725341797, "loss": 0.3771, "rewards/accuracies": 0.875, "rewards/chosen": 0.11960812658071518, "rewards/margins": 1.3039953708648682, "rewards/rejected": -1.1843873262405396, "step": 607 }, { "epoch": 0.07, "learning_rate": 2.832494439892309e-07, "logits/chosen": -2.839855670928955, "logits/rejected": -2.872342109680176, "logps/chosen": -369.30352783203125, "logps/rejected": -309.098876953125, "loss": 0.4578, "rewards/accuracies": 0.75, "rewards/chosen": 0.18495683372020721, "rewards/margins": 1.3200418949127197, "rewards/rejected": -1.135084867477417, "step": 608 }, { "epoch": 0.07, "learning_rate": 2.832143275196067e-07, "logits/chosen": -3.3907508850097656, "logits/rejected": -3.0417046546936035, "logps/chosen": -287.4632568359375, "logps/rejected": -185.15408325195312, "loss": 1.2906, "rewards/accuracies": 0.375, "rewards/chosen": -1.335735559463501, "rewards/margins": -0.5624992847442627, "rewards/rejected": -0.7732362747192383, "step": 609 }, { "epoch": 0.07, "learning_rate": 2.8317921104998243e-07, "logits/chosen": -3.7138254642486572, "logits/rejected": -3.705875873565674, "logps/chosen": -302.68310546875, "logps/rejected": -229.69387817382812, "loss": 0.5331, "rewards/accuracies": 0.75, "rewards/chosen": -0.14651048183441162, "rewards/margins": 0.7846730351448059, "rewards/rejected": -0.9311835169792175, "step": 610 }, { "epoch": 0.07, "learning_rate": 2.831440945803582e-07, "logits/chosen": -3.3624393939971924, "logits/rejected": -3.0908660888671875, "logps/chosen": -124.87248992919922, "logps/rejected": -100.67927551269531, "loss": 0.4742, "rewards/accuracies": 0.75, "rewards/chosen": -0.02834758162498474, "rewards/margins": 0.6390489935874939, "rewards/rejected": -0.667396605014801, "step": 611 }, { "epoch": 0.07, "learning_rate": 2.831089781107339e-07, "logits/chosen": -3.372060775756836, "logits/rejected": -3.1947011947631836, "logps/chosen": -157.3802490234375, "logps/rejected": -199.58578491210938, "loss": 0.4947, "rewards/accuracies": 0.875, "rewards/chosen": -0.03317014127969742, "rewards/margins": 0.7201344966888428, "rewards/rejected": -0.7533047199249268, "step": 612 }, { "epoch": 0.07, "learning_rate": 2.830738616411097e-07, "logits/chosen": -3.2238903045654297, "logits/rejected": -3.1187775135040283, "logps/chosen": -125.33981323242188, "logps/rejected": -163.28749084472656, "loss": 0.3158, "rewards/accuracies": 1.0, "rewards/chosen": 0.3590514659881592, "rewards/margins": 1.5268975496292114, "rewards/rejected": -1.1678460836410522, "step": 613 }, { "epoch": 0.07, "learning_rate": 2.8303874517148545e-07, "logits/chosen": -2.984804391860962, "logits/rejected": -3.002309799194336, "logps/chosen": -315.9145812988281, "logps/rejected": -281.891357421875, "loss": 0.6053, "rewards/accuracies": 0.625, "rewards/chosen": 0.1040472462773323, "rewards/margins": 0.6271089911460876, "rewards/rejected": -0.5230617523193359, "step": 614 }, { "epoch": 0.07, "learning_rate": 2.8300362870186115e-07, "logits/chosen": -2.814328193664551, "logits/rejected": -2.8751745223999023, "logps/chosen": -219.7996826171875, "logps/rejected": -293.1571044921875, "loss": 0.4039, "rewards/accuracies": 0.75, "rewards/chosen": 0.32106101512908936, "rewards/margins": 1.3266940116882324, "rewards/rejected": -1.0056328773498535, "step": 615 }, { "epoch": 0.07, "learning_rate": 2.829685122322369e-07, "logits/chosen": -2.8527355194091797, "logits/rejected": -2.989307403564453, "logps/chosen": -219.4447784423828, "logps/rejected": -288.9472961425781, "loss": 0.6222, "rewards/accuracies": 0.375, "rewards/chosen": 0.05713772773742676, "rewards/margins": 0.668530285358429, "rewards/rejected": -0.6113924980163574, "step": 616 }, { "epoch": 0.07, "learning_rate": 2.8293339576261265e-07, "logits/chosen": -3.837667226791382, "logits/rejected": -3.9089627265930176, "logps/chosen": -198.53402709960938, "logps/rejected": -203.71905517578125, "loss": 0.5995, "rewards/accuracies": 0.5, "rewards/chosen": -0.029162194579839706, "rewards/margins": 0.354979544878006, "rewards/rejected": -0.3841417729854584, "step": 617 }, { "epoch": 0.07, "learning_rate": 2.828982792929884e-07, "logits/chosen": -2.9510388374328613, "logits/rejected": -2.6519482135772705, "logps/chosen": -152.76417541503906, "logps/rejected": -176.57406616210938, "loss": 0.4328, "rewards/accuracies": 0.875, "rewards/chosen": -0.011390842497348785, "rewards/margins": 0.7977659702301025, "rewards/rejected": -0.8091567754745483, "step": 618 }, { "epoch": 0.07, "learning_rate": 2.8286316282336416e-07, "logits/chosen": -3.963745594024658, "logits/rejected": -3.7298078536987305, "logps/chosen": -257.343994140625, "logps/rejected": -202.93719482421875, "loss": 0.4738, "rewards/accuracies": 0.75, "rewards/chosen": 0.036448799073696136, "rewards/margins": 0.8925577402114868, "rewards/rejected": -0.8561089634895325, "step": 619 }, { "epoch": 0.07, "learning_rate": 2.8282804635373986e-07, "logits/chosen": -3.560281753540039, "logits/rejected": -3.3643643856048584, "logps/chosen": -381.47979736328125, "logps/rejected": -372.06658935546875, "loss": 0.4669, "rewards/accuracies": 0.75, "rewards/chosen": -0.27751314640045166, "rewards/margins": 0.9599399566650391, "rewards/rejected": -1.2374531030654907, "step": 620 }, { "epoch": 0.07, "learning_rate": 2.827929298841156e-07, "logits/chosen": -3.530916213989258, "logits/rejected": -3.4918291568756104, "logps/chosen": -273.8067932128906, "logps/rejected": -223.84823608398438, "loss": 0.6889, "rewards/accuracies": 0.75, "rewards/chosen": -0.06040230020880699, "rewards/margins": 0.6505823731422424, "rewards/rejected": -0.7109846472740173, "step": 621 }, { "epoch": 0.07, "learning_rate": 2.827578134144914e-07, "logits/chosen": -2.9849705696105957, "logits/rejected": -3.0985097885131836, "logps/chosen": -335.08917236328125, "logps/rejected": -304.03369140625, "loss": 0.448, "rewards/accuracies": 0.75, "rewards/chosen": 0.11139983683824539, "rewards/margins": 1.1816134452819824, "rewards/rejected": -1.0702136754989624, "step": 622 }, { "epoch": 0.07, "learning_rate": 2.827226969448671e-07, "logits/chosen": -2.9607491493225098, "logits/rejected": -3.1391518115997314, "logps/chosen": -202.5513458251953, "logps/rejected": -165.22268676757812, "loss": 0.519, "rewards/accuracies": 0.625, "rewards/chosen": -0.077151358127594, "rewards/margins": 0.5873945951461792, "rewards/rejected": -0.664546012878418, "step": 623 }, { "epoch": 0.07, "learning_rate": 2.826875804752429e-07, "logits/chosen": -2.4530487060546875, "logits/rejected": -2.3164474964141846, "logps/chosen": -210.24658203125, "logps/rejected": -255.90866088867188, "loss": 0.4222, "rewards/accuracies": 0.875, "rewards/chosen": 0.013784952461719513, "rewards/margins": 0.8635143637657166, "rewards/rejected": -0.8497294187545776, "step": 624 }, { "epoch": 0.07, "learning_rate": 2.8265246400561863e-07, "logits/chosen": -3.2347888946533203, "logits/rejected": -3.2915313243865967, "logps/chosen": -130.29518127441406, "logps/rejected": -199.05657958984375, "loss": 0.4501, "rewards/accuracies": 0.75, "rewards/chosen": -0.37697330117225647, "rewards/margins": 0.9773353934288025, "rewards/rejected": -1.3543087244033813, "step": 625 }, { "epoch": 0.07, "learning_rate": 2.826173475359944e-07, "logits/chosen": -3.669151782989502, "logits/rejected": -3.390648365020752, "logps/chosen": -206.706787109375, "logps/rejected": -159.48300170898438, "loss": 0.764, "rewards/accuracies": 0.625, "rewards/chosen": -0.4353793263435364, "rewards/margins": 0.03055882453918457, "rewards/rejected": -0.46593815088272095, "step": 626 }, { "epoch": 0.07, "learning_rate": 2.8258223106637014e-07, "logits/chosen": -2.950985908508301, "logits/rejected": -3.2271039485931396, "logps/chosen": -421.0364990234375, "logps/rejected": -423.6495361328125, "loss": 0.5084, "rewards/accuracies": 0.5, "rewards/chosen": 0.11888724565505981, "rewards/margins": 1.116855263710022, "rewards/rejected": -0.9979678988456726, "step": 627 }, { "epoch": 0.07, "learning_rate": 2.8254711459674584e-07, "logits/chosen": -2.693455219268799, "logits/rejected": -2.5099165439605713, "logps/chosen": -296.0506286621094, "logps/rejected": -217.6190185546875, "loss": 0.6226, "rewards/accuracies": 0.5, "rewards/chosen": -0.07870311290025711, "rewards/margins": 0.5490449666976929, "rewards/rejected": -0.6277481913566589, "step": 628 }, { "epoch": 0.07, "learning_rate": 2.825119981271216e-07, "logits/chosen": -2.988558292388916, "logits/rejected": -2.8650808334350586, "logps/chosen": -402.98614501953125, "logps/rejected": -315.8740539550781, "loss": 0.4938, "rewards/accuracies": 0.75, "rewards/chosen": 0.0207187682390213, "rewards/margins": 1.0009024143218994, "rewards/rejected": -0.9801836609840393, "step": 629 }, { "epoch": 0.07, "learning_rate": 2.8247688165749735e-07, "logits/chosen": -2.465223789215088, "logits/rejected": -2.769787073135376, "logps/chosen": -220.0747833251953, "logps/rejected": -228.54983520507812, "loss": 0.4297, "rewards/accuracies": 0.875, "rewards/chosen": 0.0826532319188118, "rewards/margins": 0.7393407225608826, "rewards/rejected": -0.6566874980926514, "step": 630 }, { "epoch": 0.07, "learning_rate": 2.824417651878731e-07, "logits/chosen": -2.6062750816345215, "logits/rejected": -2.872267723083496, "logps/chosen": -197.28997802734375, "logps/rejected": -284.3924560546875, "loss": 0.629, "rewards/accuracies": 0.625, "rewards/chosen": -0.33210378885269165, "rewards/margins": 1.0762338638305664, "rewards/rejected": -1.4083375930786133, "step": 631 }, { "epoch": 0.07, "learning_rate": 2.8240664871824886e-07, "logits/chosen": -2.7323367595672607, "logits/rejected": -2.745156764984131, "logps/chosen": -308.6730041503906, "logps/rejected": -396.23077392578125, "loss": 0.3815, "rewards/accuracies": 0.75, "rewards/chosen": -0.26536500453948975, "rewards/margins": 0.9743742346763611, "rewards/rejected": -1.239739179611206, "step": 632 }, { "epoch": 0.07, "learning_rate": 2.8237153224862456e-07, "logits/chosen": -3.504061698913574, "logits/rejected": -3.7011914253234863, "logps/chosen": -232.31907653808594, "logps/rejected": -202.03196716308594, "loss": 0.3384, "rewards/accuracies": 0.875, "rewards/chosen": 0.3494293987751007, "rewards/margins": 1.3804587125778198, "rewards/rejected": -1.031029224395752, "step": 633 }, { "epoch": 0.07, "learning_rate": 2.823364157790003e-07, "logits/chosen": -2.966700315475464, "logits/rejected": -2.904381513595581, "logps/chosen": -491.19134521484375, "logps/rejected": -278.7889709472656, "loss": 0.3915, "rewards/accuracies": 0.875, "rewards/chosen": 0.1501804143190384, "rewards/margins": 0.8382871150970459, "rewards/rejected": -0.6881066560745239, "step": 634 }, { "epoch": 0.07, "learning_rate": 2.823012993093761e-07, "logits/chosen": -2.5274901390075684, "logits/rejected": -3.0725626945495605, "logps/chosen": -445.3673095703125, "logps/rejected": -254.85923767089844, "loss": 0.188, "rewards/accuracies": 1.0, "rewards/chosen": 0.6510478258132935, "rewards/margins": 2.1387946605682373, "rewards/rejected": -1.4877469539642334, "step": 635 }, { "epoch": 0.07, "learning_rate": 2.822661828397518e-07, "logits/chosen": -3.3121562004089355, "logits/rejected": -3.4127559661865234, "logps/chosen": -389.0754699707031, "logps/rejected": -291.8110046386719, "loss": 0.4447, "rewards/accuracies": 0.75, "rewards/chosen": 0.12660431861877441, "rewards/margins": 0.7087717652320862, "rewards/rejected": -0.5821675658226013, "step": 636 }, { "epoch": 0.07, "learning_rate": 2.8223106637012757e-07, "logits/chosen": -2.9307665824890137, "logits/rejected": -2.8229918479919434, "logps/chosen": -303.55169677734375, "logps/rejected": -310.1448059082031, "loss": 0.5319, "rewards/accuracies": 0.75, "rewards/chosen": -0.061731815338134766, "rewards/margins": 0.9247167110443115, "rewards/rejected": -0.9864485263824463, "step": 637 }, { "epoch": 0.07, "learning_rate": 2.821959499005033e-07, "logits/chosen": -3.9062395095825195, "logits/rejected": -3.432831048965454, "logps/chosen": -339.75433349609375, "logps/rejected": -193.55880737304688, "loss": 0.6059, "rewards/accuracies": 0.625, "rewards/chosen": -0.23130403459072113, "rewards/margins": 0.37777069211006165, "rewards/rejected": -0.6090747714042664, "step": 638 }, { "epoch": 0.07, "learning_rate": 2.821608334308791e-07, "logits/chosen": -2.6190154552459717, "logits/rejected": -2.702080249786377, "logps/chosen": -516.25244140625, "logps/rejected": -289.9027099609375, "loss": 0.5306, "rewards/accuracies": 0.875, "rewards/chosen": 0.21195468306541443, "rewards/margins": 1.0171723365783691, "rewards/rejected": -0.8052175045013428, "step": 639 }, { "epoch": 0.07, "learning_rate": 2.8212571696125483e-07, "logits/chosen": -2.679837465286255, "logits/rejected": -2.923995018005371, "logps/chosen": -197.61209106445312, "logps/rejected": -200.4513702392578, "loss": 0.6621, "rewards/accuracies": 0.5, "rewards/chosen": -0.3475823402404785, "rewards/margins": 0.41853657364845276, "rewards/rejected": -0.7661188840866089, "step": 640 }, { "epoch": 0.07, "learning_rate": 2.8209060049163053e-07, "logits/chosen": -3.0901055335998535, "logits/rejected": -3.15689754486084, "logps/chosen": -314.61004638671875, "logps/rejected": -203.03675842285156, "loss": 0.4572, "rewards/accuracies": 1.0, "rewards/chosen": 0.25867682695388794, "rewards/margins": 0.7284399271011353, "rewards/rejected": -0.4697631001472473, "step": 641 }, { "epoch": 0.07, "learning_rate": 2.820554840220063e-07, "logits/chosen": -3.70808744430542, "logits/rejected": -3.533933162689209, "logps/chosen": -228.56832885742188, "logps/rejected": -157.4409637451172, "loss": 0.9322, "rewards/accuracies": 0.375, "rewards/chosen": -0.810427188873291, "rewards/margins": -0.3191819190979004, "rewards/rejected": -0.49124518036842346, "step": 642 }, { "epoch": 0.07, "learning_rate": 2.8202036755238204e-07, "logits/chosen": -3.4402666091918945, "logits/rejected": -3.608651876449585, "logps/chosen": -167.5726776123047, "logps/rejected": -189.20660400390625, "loss": 0.4443, "rewards/accuracies": 0.75, "rewards/chosen": -0.0929604098200798, "rewards/margins": 1.208081841468811, "rewards/rejected": -1.3010423183441162, "step": 643 }, { "epoch": 0.07, "learning_rate": 2.819852510827578e-07, "logits/chosen": -2.3759982585906982, "logits/rejected": -2.7116000652313232, "logps/chosen": -219.03952026367188, "logps/rejected": -194.26783752441406, "loss": 0.5063, "rewards/accuracies": 0.625, "rewards/chosen": -0.02527442015707493, "rewards/margins": 0.9281631112098694, "rewards/rejected": -0.9534375667572021, "step": 644 }, { "epoch": 0.07, "learning_rate": 2.8195013461313355e-07, "logits/chosen": -2.429760456085205, "logits/rejected": -2.3235838413238525, "logps/chosen": -186.65560913085938, "logps/rejected": -248.0267333984375, "loss": 0.7367, "rewards/accuracies": 0.5, "rewards/chosen": -0.5467196702957153, "rewards/margins": 0.002445850521326065, "rewards/rejected": -0.5491654872894287, "step": 645 }, { "epoch": 0.07, "learning_rate": 2.819150181435093e-07, "logits/chosen": -3.0598084926605225, "logits/rejected": -3.109884262084961, "logps/chosen": -284.8348083496094, "logps/rejected": -388.57171630859375, "loss": 0.2034, "rewards/accuracies": 1.0, "rewards/chosen": 0.6189019083976746, "rewards/margins": 1.8634790182113647, "rewards/rejected": -1.244577169418335, "step": 646 }, { "epoch": 0.07, "learning_rate": 2.8187990167388506e-07, "logits/chosen": -3.4173641204833984, "logits/rejected": -3.3686740398406982, "logps/chosen": -203.47494506835938, "logps/rejected": -172.414794921875, "loss": 0.4649, "rewards/accuracies": 0.625, "rewards/chosen": 0.2973012328147888, "rewards/margins": 0.8682623505592346, "rewards/rejected": -0.5709611177444458, "step": 647 }, { "epoch": 0.07, "learning_rate": 2.818447852042608e-07, "logits/chosen": -2.737396717071533, "logits/rejected": -2.8556737899780273, "logps/chosen": -368.24993896484375, "logps/rejected": -150.498046875, "loss": 0.4569, "rewards/accuracies": 0.875, "rewards/chosen": 0.15030935406684875, "rewards/margins": 0.7441118955612183, "rewards/rejected": -0.5938024520874023, "step": 648 }, { "epoch": 0.07, "learning_rate": 2.818096687346365e-07, "logits/chosen": -3.612534761428833, "logits/rejected": -3.7730445861816406, "logps/chosen": -260.3518371582031, "logps/rejected": -237.2313690185547, "loss": 0.3097, "rewards/accuracies": 1.0, "rewards/chosen": -0.10116110742092133, "rewards/margins": 1.478062391281128, "rewards/rejected": -1.5792236328125, "step": 649 }, { "epoch": 0.07, "learning_rate": 2.8177455226501227e-07, "logits/chosen": -3.8730850219726562, "logits/rejected": -3.579479932785034, "logps/chosen": -329.912841796875, "logps/rejected": -218.162353515625, "loss": 0.456, "rewards/accuracies": 0.875, "rewards/chosen": 0.09039077162742615, "rewards/margins": 0.7901965379714966, "rewards/rejected": -0.699805736541748, "step": 650 }, { "epoch": 0.08, "learning_rate": 2.81739435795388e-07, "logits/chosen": -2.870739459991455, "logits/rejected": -2.986448287963867, "logps/chosen": -253.240966796875, "logps/rejected": -235.52874755859375, "loss": 0.3736, "rewards/accuracies": 0.875, "rewards/chosen": 0.3080122470855713, "rewards/margins": 1.2318308353424072, "rewards/rejected": -0.9238184690475464, "step": 651 }, { "epoch": 0.08, "learning_rate": 2.817043193257638e-07, "logits/chosen": -3.2262566089630127, "logits/rejected": -3.2233049869537354, "logps/chosen": -226.5541229248047, "logps/rejected": -254.38339233398438, "loss": 0.499, "rewards/accuracies": 0.625, "rewards/chosen": -0.06557979434728622, "rewards/margins": 0.7249903082847595, "rewards/rejected": -0.790570080280304, "step": 652 }, { "epoch": 0.08, "learning_rate": 2.8166920285613953e-07, "logits/chosen": -3.3718514442443848, "logits/rejected": -3.6217567920684814, "logps/chosen": -219.1671142578125, "logps/rejected": -269.7743835449219, "loss": 0.5121, "rewards/accuracies": 0.5, "rewards/chosen": 0.07980260252952576, "rewards/margins": 1.1106517314910889, "rewards/rejected": -1.0308490991592407, "step": 653 }, { "epoch": 0.08, "learning_rate": 2.816340863865153e-07, "logits/chosen": -3.5437674522399902, "logits/rejected": -3.562863826751709, "logps/chosen": -383.1075134277344, "logps/rejected": -340.1318359375, "loss": 0.1843, "rewards/accuracies": 1.0, "rewards/chosen": 0.23954106867313385, "rewards/margins": 1.8402096033096313, "rewards/rejected": -1.6006686687469482, "step": 654 }, { "epoch": 0.08, "learning_rate": 2.81598969916891e-07, "logits/chosen": -2.182158946990967, "logits/rejected": -2.341937303543091, "logps/chosen": -448.85687255859375, "logps/rejected": -303.45928955078125, "loss": 0.3674, "rewards/accuracies": 1.0, "rewards/chosen": 0.2605876624584198, "rewards/margins": 0.9077305197715759, "rewards/rejected": -0.6471428871154785, "step": 655 }, { "epoch": 0.08, "learning_rate": 2.815638534472668e-07, "logits/chosen": -2.663252115249634, "logits/rejected": -2.679100275039673, "logps/chosen": -250.4080810546875, "logps/rejected": -328.595458984375, "loss": 0.513, "rewards/accuracies": 0.875, "rewards/chosen": -0.06937167793512344, "rewards/margins": 0.8674710988998413, "rewards/rejected": -0.9368427395820618, "step": 656 }, { "epoch": 0.08, "learning_rate": 2.815287369776425e-07, "logits/chosen": -2.8067548274993896, "logits/rejected": -2.9203338623046875, "logps/chosen": -241.26138305664062, "logps/rejected": -186.9059295654297, "loss": 0.4242, "rewards/accuracies": 1.0, "rewards/chosen": 0.1027831956744194, "rewards/margins": 0.7267370223999023, "rewards/rejected": -0.6239538192749023, "step": 657 }, { "epoch": 0.08, "learning_rate": 2.8149362050801824e-07, "logits/chosen": -3.4372267723083496, "logits/rejected": -3.178781270980835, "logps/chosen": -376.176513671875, "logps/rejected": -221.98355102539062, "loss": 0.4268, "rewards/accuracies": 0.875, "rewards/chosen": -0.33262619376182556, "rewards/margins": 0.9065229892730713, "rewards/rejected": -1.2391493320465088, "step": 658 }, { "epoch": 0.08, "learning_rate": 2.81458504038394e-07, "logits/chosen": -3.217949390411377, "logits/rejected": -3.089524269104004, "logps/chosen": -228.12503051757812, "logps/rejected": -243.04562377929688, "loss": 0.5367, "rewards/accuracies": 0.875, "rewards/chosen": -0.37924641370773315, "rewards/margins": 0.4592605233192444, "rewards/rejected": -0.8385068774223328, "step": 659 }, { "epoch": 0.08, "learning_rate": 2.8142338756876975e-07, "logits/chosen": -2.900501251220703, "logits/rejected": -2.7165184020996094, "logps/chosen": -248.76388549804688, "logps/rejected": -207.01409912109375, "loss": 0.3649, "rewards/accuracies": 0.875, "rewards/chosen": 0.15769578516483307, "rewards/margins": 0.8768856525421143, "rewards/rejected": -0.7191898822784424, "step": 660 }, { "epoch": 0.08, "learning_rate": 2.813882710991455e-07, "logits/chosen": -3.957078456878662, "logits/rejected": -3.8362674713134766, "logps/chosen": -145.32540893554688, "logps/rejected": -136.26345825195312, "loss": 0.3963, "rewards/accuracies": 0.875, "rewards/chosen": 0.011360090225934982, "rewards/margins": 1.0556401014328003, "rewards/rejected": -1.044279932975769, "step": 661 }, { "epoch": 0.08, "learning_rate": 2.813531546295212e-07, "logits/chosen": -2.8501622676849365, "logits/rejected": -3.0820937156677246, "logps/chosen": -456.33319091796875, "logps/rejected": -192.61392211914062, "loss": 0.3305, "rewards/accuracies": 0.875, "rewards/chosen": 0.24530985951423645, "rewards/margins": 1.1563799381256104, "rewards/rejected": -0.9110701084136963, "step": 662 }, { "epoch": 0.08, "learning_rate": 2.8131803815989696e-07, "logits/chosen": -3.8404223918914795, "logits/rejected": -3.631300449371338, "logps/chosen": -197.45916748046875, "logps/rejected": -113.88496398925781, "loss": 0.6455, "rewards/accuracies": 0.625, "rewards/chosen": -0.3517687916755676, "rewards/margins": 0.25402748584747314, "rewards/rejected": -0.6057963371276855, "step": 663 }, { "epoch": 0.08, "learning_rate": 2.812829216902727e-07, "logits/chosen": -3.119865655899048, "logits/rejected": -3.1801528930664062, "logps/chosen": -287.74066162109375, "logps/rejected": -112.96851348876953, "loss": 0.5897, "rewards/accuracies": 0.75, "rewards/chosen": -0.12497826665639877, "rewards/margins": 0.33184346556663513, "rewards/rejected": -0.4568217396736145, "step": 664 }, { "epoch": 0.08, "learning_rate": 2.8124780522064847e-07, "logits/chosen": -3.4885144233703613, "logits/rejected": -3.3222920894622803, "logps/chosen": -86.46119689941406, "logps/rejected": -141.62918090820312, "loss": 0.4183, "rewards/accuracies": 0.875, "rewards/chosen": 0.24856294691562653, "rewards/margins": 0.7436630129814148, "rewards/rejected": -0.49510008096694946, "step": 665 }, { "epoch": 0.08, "learning_rate": 2.812126887510242e-07, "logits/chosen": -3.740874767303467, "logits/rejected": -3.7094991207122803, "logps/chosen": -178.31536865234375, "logps/rejected": -139.36520385742188, "loss": 0.6921, "rewards/accuracies": 0.5, "rewards/chosen": -0.16647811233997345, "rewards/margins": 0.2598204016685486, "rewards/rejected": -0.4262985289096832, "step": 666 }, { "epoch": 0.08, "learning_rate": 2.811775722814e-07, "logits/chosen": -3.1754376888275146, "logits/rejected": -2.932903289794922, "logps/chosen": -263.0095520019531, "logps/rejected": -227.65838623046875, "loss": 0.6239, "rewards/accuracies": 0.5, "rewards/chosen": -0.47631216049194336, "rewards/margins": 0.9844347238540649, "rewards/rejected": -1.4607468843460083, "step": 667 }, { "epoch": 0.08, "learning_rate": 2.811424558117757e-07, "logits/chosen": -3.1097710132598877, "logits/rejected": -2.9225053787231445, "logps/chosen": -532.6370239257812, "logps/rejected": -284.1167907714844, "loss": 0.6641, "rewards/accuracies": 0.625, "rewards/chosen": 0.1953025609254837, "rewards/margins": 0.24375471472740173, "rewards/rejected": -0.04845218360424042, "step": 668 }, { "epoch": 0.08, "learning_rate": 2.811073393421515e-07, "logits/chosen": -3.4223151206970215, "logits/rejected": -3.2450737953186035, "logps/chosen": -362.22723388671875, "logps/rejected": -198.56187438964844, "loss": 0.4319, "rewards/accuracies": 0.875, "rewards/chosen": -0.03571777045726776, "rewards/margins": 1.2749719619750977, "rewards/rejected": -1.3106898069381714, "step": 669 }, { "epoch": 0.08, "learning_rate": 2.810722228725272e-07, "logits/chosen": -2.9319663047790527, "logits/rejected": -3.094832181930542, "logps/chosen": -232.2923583984375, "logps/rejected": -280.48388671875, "loss": 0.2578, "rewards/accuracies": 0.875, "rewards/chosen": 0.24395324289798737, "rewards/margins": 1.5181598663330078, "rewards/rejected": -1.2742066383361816, "step": 670 }, { "epoch": 0.08, "learning_rate": 2.8103710640290294e-07, "logits/chosen": -3.8010122776031494, "logits/rejected": -3.588047742843628, "logps/chosen": -234.400146484375, "logps/rejected": -213.72781372070312, "loss": 0.4428, "rewards/accuracies": 0.875, "rewards/chosen": -0.17712001502513885, "rewards/margins": 0.8781943321228027, "rewards/rejected": -1.055314302444458, "step": 671 }, { "epoch": 0.08, "learning_rate": 2.810019899332787e-07, "logits/chosen": -2.562882900238037, "logits/rejected": -2.899467945098877, "logps/chosen": -320.32635498046875, "logps/rejected": -225.9552001953125, "loss": 0.5401, "rewards/accuracies": 0.875, "rewards/chosen": 0.07994194328784943, "rewards/margins": 0.6259147524833679, "rewards/rejected": -0.5459728240966797, "step": 672 }, { "epoch": 0.08, "learning_rate": 2.8096687346365445e-07, "logits/chosen": -2.525270462036133, "logits/rejected": -2.663062810897827, "logps/chosen": -230.9478759765625, "logps/rejected": -269.2705078125, "loss": 0.4953, "rewards/accuracies": 0.75, "rewards/chosen": -0.06332116574048996, "rewards/margins": 0.8073590993881226, "rewards/rejected": -0.8706803321838379, "step": 673 }, { "epoch": 0.08, "learning_rate": 2.809317569940302e-07, "logits/chosen": -3.27494740486145, "logits/rejected": -3.249255657196045, "logps/chosen": -129.63888549804688, "logps/rejected": -192.5345916748047, "loss": 0.3029, "rewards/accuracies": 0.875, "rewards/chosen": 0.314860075712204, "rewards/margins": 1.5779260396957397, "rewards/rejected": -1.2630659341812134, "step": 674 }, { "epoch": 0.08, "learning_rate": 2.8089664052440595e-07, "logits/chosen": -2.46126389503479, "logits/rejected": -2.5478923320770264, "logps/chosen": -270.1539611816406, "logps/rejected": -316.5727844238281, "loss": 0.4632, "rewards/accuracies": 0.75, "rewards/chosen": 0.22709369659423828, "rewards/margins": 0.7511131167411804, "rewards/rejected": -0.5240194797515869, "step": 675 }, { "epoch": 0.08, "learning_rate": 2.8086152405478166e-07, "logits/chosen": -3.0591249465942383, "logits/rejected": -3.4650230407714844, "logps/chosen": -63.17518997192383, "logps/rejected": -184.40037536621094, "loss": 0.6821, "rewards/accuracies": 0.625, "rewards/chosen": -0.03870847821235657, "rewards/margins": 0.5994771718978882, "rewards/rejected": -0.6381855607032776, "step": 676 }, { "epoch": 0.08, "learning_rate": 2.808264075851574e-07, "logits/chosen": -3.387763261795044, "logits/rejected": -3.4069201946258545, "logps/chosen": -452.8389892578125, "logps/rejected": -309.1654968261719, "loss": 0.4548, "rewards/accuracies": 0.75, "rewards/chosen": -0.3142624795436859, "rewards/margins": 1.189105749130249, "rewards/rejected": -1.5033681392669678, "step": 677 }, { "epoch": 0.08, "learning_rate": 2.8079129111553316e-07, "logits/chosen": -3.295924425125122, "logits/rejected": -3.3116328716278076, "logps/chosen": -263.3892822265625, "logps/rejected": -186.02883911132812, "loss": 0.4234, "rewards/accuracies": 0.875, "rewards/chosen": -0.0004598647356033325, "rewards/margins": 0.947930097579956, "rewards/rejected": -0.9483898878097534, "step": 678 }, { "epoch": 0.08, "learning_rate": 2.807561746459089e-07, "logits/chosen": -3.211676597595215, "logits/rejected": -2.964092969894409, "logps/chosen": -282.63330078125, "logps/rejected": -376.3515319824219, "loss": 0.3642, "rewards/accuracies": 0.875, "rewards/chosen": 0.1897697150707245, "rewards/margins": 1.3087869882583618, "rewards/rejected": -1.1190173625946045, "step": 679 }, { "epoch": 0.08, "learning_rate": 2.8072105817628467e-07, "logits/chosen": -2.9880011081695557, "logits/rejected": -3.0205297470092773, "logps/chosen": -150.84628295898438, "logps/rejected": -176.31793212890625, "loss": 0.482, "rewards/accuracies": 0.75, "rewards/chosen": -0.04968436062335968, "rewards/margins": 1.008692741394043, "rewards/rejected": -1.0583771467208862, "step": 680 }, { "epoch": 0.08, "learning_rate": 2.806859417066604e-07, "logits/chosen": -3.131657600402832, "logits/rejected": -2.874061346054077, "logps/chosen": -100.8520736694336, "logps/rejected": -181.38697814941406, "loss": 0.3973, "rewards/accuracies": 0.75, "rewards/chosen": 0.08181305229663849, "rewards/margins": 1.0862394571304321, "rewards/rejected": -1.0044264793395996, "step": 681 }, { "epoch": 0.08, "learning_rate": 2.806508252370362e-07, "logits/chosen": -2.808809757232666, "logits/rejected": -2.70902943611145, "logps/chosen": -277.4212646484375, "logps/rejected": -341.1982421875, "loss": 0.373, "rewards/accuracies": 0.875, "rewards/chosen": 0.30684730410575867, "rewards/margins": 1.2745519876480103, "rewards/rejected": -0.9677046537399292, "step": 682 }, { "epoch": 0.08, "learning_rate": 2.8061570876741193e-07, "logits/chosen": -2.6040287017822266, "logits/rejected": -2.595909595489502, "logps/chosen": -240.48321533203125, "logps/rejected": -198.50997924804688, "loss": 0.3561, "rewards/accuracies": 1.0, "rewards/chosen": 0.2184903919696808, "rewards/margins": 1.0276191234588623, "rewards/rejected": -0.8091287612915039, "step": 683 }, { "epoch": 0.08, "learning_rate": 2.8058059229778763e-07, "logits/chosen": -2.8333404064178467, "logits/rejected": -2.4502015113830566, "logps/chosen": -219.761474609375, "logps/rejected": -142.65008544921875, "loss": 0.5321, "rewards/accuracies": 0.75, "rewards/chosen": 0.2704215943813324, "rewards/margins": 0.5716500878334045, "rewards/rejected": -0.30122846364974976, "step": 684 }, { "epoch": 0.08, "learning_rate": 2.805454758281634e-07, "logits/chosen": -2.5912113189697266, "logits/rejected": -2.7085680961608887, "logps/chosen": -411.77685546875, "logps/rejected": -289.8896484375, "loss": 0.4253, "rewards/accuracies": 0.875, "rewards/chosen": 0.2640058696269989, "rewards/margins": 1.0749235153198242, "rewards/rejected": -0.8109176158905029, "step": 685 }, { "epoch": 0.08, "learning_rate": 2.8051035935853914e-07, "logits/chosen": -2.515194892883301, "logits/rejected": -2.6483588218688965, "logps/chosen": -426.415283203125, "logps/rejected": -347.5091247558594, "loss": 0.3039, "rewards/accuracies": 1.0, "rewards/chosen": 0.005179956555366516, "rewards/margins": 1.3198150396347046, "rewards/rejected": -1.3146350383758545, "step": 686 }, { "epoch": 0.08, "learning_rate": 2.804752428889149e-07, "logits/chosen": -3.6294760704040527, "logits/rejected": -3.5103511810302734, "logps/chosen": -240.3109130859375, "logps/rejected": -221.80860900878906, "loss": 0.2843, "rewards/accuracies": 0.75, "rewards/chosen": 0.3888118863105774, "rewards/margins": 1.8055329322814941, "rewards/rejected": -1.4167211055755615, "step": 687 }, { "epoch": 0.08, "learning_rate": 2.8044012641929065e-07, "logits/chosen": -3.248447895050049, "logits/rejected": -3.404047727584839, "logps/chosen": -291.5246276855469, "logps/rejected": -197.08004760742188, "loss": 0.5457, "rewards/accuracies": 0.625, "rewards/chosen": 0.538979709148407, "rewards/margins": 1.1935218572616577, "rewards/rejected": -0.654542088508606, "step": 688 }, { "epoch": 0.08, "learning_rate": 2.8040500994966635e-07, "logits/chosen": -2.7433083057403564, "logits/rejected": -2.693399429321289, "logps/chosen": -216.10342407226562, "logps/rejected": -193.66622924804688, "loss": 0.535, "rewards/accuracies": 0.875, "rewards/chosen": 0.09604065120220184, "rewards/margins": 1.2432631254196167, "rewards/rejected": -1.1472225189208984, "step": 689 }, { "epoch": 0.08, "learning_rate": 2.8036989348004216e-07, "logits/chosen": -3.1248443126678467, "logits/rejected": -3.3194429874420166, "logps/chosen": -271.44677734375, "logps/rejected": -354.9962158203125, "loss": 0.3927, "rewards/accuracies": 0.75, "rewards/chosen": 0.19200202822685242, "rewards/margins": 1.6102817058563232, "rewards/rejected": -1.418279767036438, "step": 690 }, { "epoch": 0.08, "learning_rate": 2.803347770104179e-07, "logits/chosen": -3.6547465324401855, "logits/rejected": -3.507004737854004, "logps/chosen": -186.81564331054688, "logps/rejected": -220.38259887695312, "loss": 0.2707, "rewards/accuracies": 0.875, "rewards/chosen": -0.1647680252790451, "rewards/margins": 1.9419746398925781, "rewards/rejected": -2.1067428588867188, "step": 691 }, { "epoch": 0.08, "learning_rate": 2.802996605407936e-07, "logits/chosen": -3.6030077934265137, "logits/rejected": -3.2871081829071045, "logps/chosen": -316.1094970703125, "logps/rejected": -225.51097106933594, "loss": 0.5874, "rewards/accuracies": 0.5, "rewards/chosen": -0.08980468660593033, "rewards/margins": 0.6749645471572876, "rewards/rejected": -0.7647692561149597, "step": 692 }, { "epoch": 0.08, "learning_rate": 2.8026454407116936e-07, "logits/chosen": -3.0034432411193848, "logits/rejected": -3.0887794494628906, "logps/chosen": -272.9200134277344, "logps/rejected": -254.30731201171875, "loss": 0.6342, "rewards/accuracies": 0.875, "rewards/chosen": -0.28065866231918335, "rewards/margins": 0.8286210894584656, "rewards/rejected": -1.109279751777649, "step": 693 }, { "epoch": 0.08, "learning_rate": 2.802294276015451e-07, "logits/chosen": -3.42630672454834, "logits/rejected": -3.390458345413208, "logps/chosen": -166.72396850585938, "logps/rejected": -181.4721221923828, "loss": 0.3742, "rewards/accuracies": 0.875, "rewards/chosen": 0.010705262422561646, "rewards/margins": 1.5873644351959229, "rewards/rejected": -1.5766589641571045, "step": 694 }, { "epoch": 0.08, "learning_rate": 2.8019431113192087e-07, "logits/chosen": -3.5085535049438477, "logits/rejected": -3.1806631088256836, "logps/chosen": -335.67327880859375, "logps/rejected": -222.08627319335938, "loss": 0.7465, "rewards/accuracies": 0.5, "rewards/chosen": 0.11532177031040192, "rewards/margins": 0.6543084979057312, "rewards/rejected": -0.5389867424964905, "step": 695 }, { "epoch": 0.08, "learning_rate": 2.801591946622966e-07, "logits/chosen": -3.2428689002990723, "logits/rejected": -3.399885654449463, "logps/chosen": -263.11761474609375, "logps/rejected": -248.46810913085938, "loss": 0.4727, "rewards/accuracies": 0.875, "rewards/chosen": 0.12523208558559418, "rewards/margins": 0.6779493689537048, "rewards/rejected": -0.5527173280715942, "step": 696 }, { "epoch": 0.08, "learning_rate": 2.8012407819267233e-07, "logits/chosen": -2.223513603210449, "logits/rejected": -2.4811928272247314, "logps/chosen": -328.03662109375, "logps/rejected": -389.5274963378906, "loss": 0.3393, "rewards/accuracies": 0.75, "rewards/chosen": 0.1013520210981369, "rewards/margins": 1.3311753273010254, "rewards/rejected": -1.229823350906372, "step": 697 }, { "epoch": 0.08, "learning_rate": 2.800889617230481e-07, "logits/chosen": -3.5635762214660645, "logits/rejected": -3.4025607109069824, "logps/chosen": -113.25352478027344, "logps/rejected": -212.2552947998047, "loss": 0.3372, "rewards/accuracies": 0.875, "rewards/chosen": -0.061311714351177216, "rewards/margins": 1.4127999544143677, "rewards/rejected": -1.4741116762161255, "step": 698 }, { "epoch": 0.08, "learning_rate": 2.8005384525342383e-07, "logits/chosen": -2.7048871517181396, "logits/rejected": -2.554380416870117, "logps/chosen": -352.5461730957031, "logps/rejected": -265.97833251953125, "loss": 0.542, "rewards/accuracies": 0.5, "rewards/chosen": 0.3755878210067749, "rewards/margins": 0.6865990161895752, "rewards/rejected": -0.3110112249851227, "step": 699 }, { "epoch": 0.08, "learning_rate": 2.800187287837996e-07, "logits/chosen": -2.2965357303619385, "logits/rejected": -1.9074287414550781, "logps/chosen": -199.3422088623047, "logps/rejected": -313.6127624511719, "loss": 0.5113, "rewards/accuracies": 0.75, "rewards/chosen": -0.23939643800258636, "rewards/margins": 0.7571967840194702, "rewards/rejected": -0.996593177318573, "step": 700 }, { "epoch": 0.08, "learning_rate": 2.7998361231417534e-07, "logits/chosen": -3.5383293628692627, "logits/rejected": -3.5414085388183594, "logps/chosen": -308.8039855957031, "logps/rejected": -323.1033935546875, "loss": 0.4573, "rewards/accuracies": 0.875, "rewards/chosen": 0.06360204517841339, "rewards/margins": 1.2177666425704956, "rewards/rejected": -1.1541645526885986, "step": 701 }, { "epoch": 0.08, "learning_rate": 2.7994849584455104e-07, "logits/chosen": -3.158003807067871, "logits/rejected": -3.127227783203125, "logps/chosen": -330.5771484375, "logps/rejected": -277.0057067871094, "loss": 0.6162, "rewards/accuracies": 0.625, "rewards/chosen": -0.039095744490623474, "rewards/margins": 0.6411279439926147, "rewards/rejected": -0.6802237033843994, "step": 702 }, { "epoch": 0.08, "learning_rate": 2.7991337937492685e-07, "logits/chosen": -3.391720771789551, "logits/rejected": -3.5536458492279053, "logps/chosen": -149.1319122314453, "logps/rejected": -252.89488220214844, "loss": 0.5645, "rewards/accuracies": 0.75, "rewards/chosen": 0.03919847309589386, "rewards/margins": 1.124940276145935, "rewards/rejected": -1.0857417583465576, "step": 703 }, { "epoch": 0.08, "learning_rate": 2.798782629053026e-07, "logits/chosen": -3.42157244682312, "logits/rejected": -3.7671823501586914, "logps/chosen": -261.8943176269531, "logps/rejected": -336.5327453613281, "loss": 0.3204, "rewards/accuracies": 1.0, "rewards/chosen": 0.0694407969713211, "rewards/margins": 1.4619674682617188, "rewards/rejected": -1.392526626586914, "step": 704 }, { "epoch": 0.08, "learning_rate": 2.798431464356783e-07, "logits/chosen": -3.438798427581787, "logits/rejected": -3.211738109588623, "logps/chosen": -222.43499755859375, "logps/rejected": -184.21075439453125, "loss": 0.672, "rewards/accuracies": 0.375, "rewards/chosen": -0.011085599660873413, "rewards/margins": 0.3730858564376831, "rewards/rejected": -0.38417142629623413, "step": 705 }, { "epoch": 0.08, "learning_rate": 2.7980802996605406e-07, "logits/chosen": -3.0592851638793945, "logits/rejected": -2.938581705093384, "logps/chosen": -169.57278442382812, "logps/rejected": -296.74969482421875, "loss": 0.2689, "rewards/accuracies": 1.0, "rewards/chosen": 0.3602779507637024, "rewards/margins": 1.771864414215088, "rewards/rejected": -1.4115862846374512, "step": 706 }, { "epoch": 0.08, "learning_rate": 2.797729134964298e-07, "logits/chosen": -2.977429151535034, "logits/rejected": -3.1957926750183105, "logps/chosen": -262.33929443359375, "logps/rejected": -203.01358032226562, "loss": 0.6024, "rewards/accuracies": 0.625, "rewards/chosen": -0.3755205273628235, "rewards/margins": 0.6346362233161926, "rewards/rejected": -1.0101567506790161, "step": 707 }, { "epoch": 0.08, "learning_rate": 2.7973779702680557e-07, "logits/chosen": -3.205326557159424, "logits/rejected": -3.0857486724853516, "logps/chosen": -398.4686279296875, "logps/rejected": -302.9078369140625, "loss": 0.4013, "rewards/accuracies": 0.625, "rewards/chosen": 0.14540919661521912, "rewards/margins": 1.6690673828125, "rewards/rejected": -1.5236581563949585, "step": 708 }, { "epoch": 0.08, "learning_rate": 2.797026805571813e-07, "logits/chosen": -2.898405075073242, "logits/rejected": -2.9792308807373047, "logps/chosen": -131.74102783203125, "logps/rejected": -299.36651611328125, "loss": 0.3555, "rewards/accuracies": 0.875, "rewards/chosen": -0.35862088203430176, "rewards/margins": 1.3310527801513672, "rewards/rejected": -1.6896735429763794, "step": 709 }, { "epoch": 0.08, "learning_rate": 2.79667564087557e-07, "logits/chosen": -2.7953314781188965, "logits/rejected": -2.966688394546509, "logps/chosen": -142.406494140625, "logps/rejected": -241.51358032226562, "loss": 0.6928, "rewards/accuracies": 0.625, "rewards/chosen": -0.19121915102005005, "rewards/margins": 0.42608654499053955, "rewards/rejected": -0.6173057556152344, "step": 710 }, { "epoch": 0.08, "learning_rate": 2.796324476179328e-07, "logits/chosen": -3.5614981651306152, "logits/rejected": -3.113609790802002, "logps/chosen": -528.8561401367188, "logps/rejected": -230.63150024414062, "loss": 0.6038, "rewards/accuracies": 0.75, "rewards/chosen": -0.24319688975811005, "rewards/margins": 0.9700608253479004, "rewards/rejected": -1.2132576704025269, "step": 711 }, { "epoch": 0.08, "learning_rate": 2.795973311483086e-07, "logits/chosen": -3.450883388519287, "logits/rejected": -3.1953043937683105, "logps/chosen": -177.04537963867188, "logps/rejected": -173.73739624023438, "loss": 0.4407, "rewards/accuracies": 0.875, "rewards/chosen": -0.025586768984794617, "rewards/margins": 1.3895102739334106, "rewards/rejected": -1.4150969982147217, "step": 712 }, { "epoch": 0.08, "learning_rate": 2.795622146786843e-07, "logits/chosen": -3.022975444793701, "logits/rejected": -3.166229724884033, "logps/chosen": -265.0945739746094, "logps/rejected": -336.5422668457031, "loss": 0.3947, "rewards/accuracies": 0.875, "rewards/chosen": -0.2782943844795227, "rewards/margins": 1.6208069324493408, "rewards/rejected": -1.8991012573242188, "step": 713 }, { "epoch": 0.08, "learning_rate": 2.7952709820906004e-07, "logits/chosen": -2.9518818855285645, "logits/rejected": -2.85339093208313, "logps/chosen": -225.41339111328125, "logps/rejected": -264.4231262207031, "loss": 0.3502, "rewards/accuracies": 0.875, "rewards/chosen": 0.2141304910182953, "rewards/margins": 1.193411946296692, "rewards/rejected": -0.979281485080719, "step": 714 }, { "epoch": 0.08, "learning_rate": 2.794919817394358e-07, "logits/chosen": -3.4671685695648193, "logits/rejected": -3.010868549346924, "logps/chosen": -266.54180908203125, "logps/rejected": -168.47796630859375, "loss": 0.4375, "rewards/accuracies": 0.875, "rewards/chosen": -0.10000243037939072, "rewards/margins": 0.7507315874099731, "rewards/rejected": -0.8507339954376221, "step": 715 }, { "epoch": 0.08, "learning_rate": 2.7945686526981154e-07, "logits/chosen": -3.561511278152466, "logits/rejected": -3.179701566696167, "logps/chosen": -228.364990234375, "logps/rejected": -167.16336059570312, "loss": 0.553, "rewards/accuracies": 0.5, "rewards/chosen": -0.023554518818855286, "rewards/margins": 0.8202447891235352, "rewards/rejected": -0.843799352645874, "step": 716 }, { "epoch": 0.08, "learning_rate": 2.794217488001873e-07, "logits/chosen": -3.2105660438537598, "logits/rejected": -3.1001129150390625, "logps/chosen": -235.6729736328125, "logps/rejected": -245.31349182128906, "loss": 0.3792, "rewards/accuracies": 1.0, "rewards/chosen": -0.21252159774303436, "rewards/margins": 1.2790400981903076, "rewards/rejected": -1.491561770439148, "step": 717 }, { "epoch": 0.08, "learning_rate": 2.79386632330563e-07, "logits/chosen": -2.600304126739502, "logits/rejected": -2.3475260734558105, "logps/chosen": -458.005126953125, "logps/rejected": -353.92022705078125, "loss": 0.6151, "rewards/accuracies": 0.5, "rewards/chosen": -0.21235902607440948, "rewards/margins": 0.329931378364563, "rewards/rejected": -0.5422903895378113, "step": 718 }, { "epoch": 0.08, "learning_rate": 2.7935151586093875e-07, "logits/chosen": -2.723416328430176, "logits/rejected": -2.1372413635253906, "logps/chosen": -479.31402587890625, "logps/rejected": -422.8626403808594, "loss": 0.5397, "rewards/accuracies": 0.625, "rewards/chosen": 0.060451313853263855, "rewards/margins": 0.7133290767669678, "rewards/rejected": -0.6528778672218323, "step": 719 }, { "epoch": 0.08, "learning_rate": 2.793163993913145e-07, "logits/chosen": -3.8497650623321533, "logits/rejected": -4.18699836730957, "logps/chosen": -354.9637451171875, "logps/rejected": -426.37371826171875, "loss": 0.4274, "rewards/accuracies": 0.75, "rewards/chosen": 0.13541057705879211, "rewards/margins": 1.0846930742263794, "rewards/rejected": -0.9492824673652649, "step": 720 }, { "epoch": 0.08, "learning_rate": 2.7928128292169026e-07, "logits/chosen": -3.0595836639404297, "logits/rejected": -2.864090919494629, "logps/chosen": -116.8880615234375, "logps/rejected": -276.19677734375, "loss": 0.4279, "rewards/accuracies": 0.75, "rewards/chosen": 0.0646720826625824, "rewards/margins": 1.173906922340393, "rewards/rejected": -1.1092348098754883, "step": 721 }, { "epoch": 0.08, "learning_rate": 2.79246166452066e-07, "logits/chosen": -3.027130365371704, "logits/rejected": -2.8423495292663574, "logps/chosen": -326.50274658203125, "logps/rejected": -260.4288330078125, "loss": 1.2141, "rewards/accuracies": 0.625, "rewards/chosen": -0.9012296795845032, "rewards/margins": -0.12403631210327148, "rewards/rejected": -0.7771934866905212, "step": 722 }, { "epoch": 0.08, "learning_rate": 2.792110499824417e-07, "logits/chosen": -2.9410150051116943, "logits/rejected": -2.9808695316314697, "logps/chosen": -338.3072204589844, "logps/rejected": -221.28509521484375, "loss": 0.5662, "rewards/accuracies": 0.625, "rewards/chosen": -0.06646937876939774, "rewards/margins": 0.5223274827003479, "rewards/rejected": -0.588796854019165, "step": 723 }, { "epoch": 0.08, "learning_rate": 2.791759335128175e-07, "logits/chosen": -3.389832019805908, "logits/rejected": -3.607801914215088, "logps/chosen": -184.72640991210938, "logps/rejected": -191.76133728027344, "loss": 0.4964, "rewards/accuracies": 0.875, "rewards/chosen": -0.13443441689014435, "rewards/margins": 0.8097478151321411, "rewards/rejected": -0.944182276725769, "step": 724 }, { "epoch": 0.08, "learning_rate": 2.791408170431933e-07, "logits/chosen": -3.7416253089904785, "logits/rejected": -3.840014696121216, "logps/chosen": -194.82476806640625, "logps/rejected": -185.4194793701172, "loss": 0.5016, "rewards/accuracies": 0.875, "rewards/chosen": -0.45439350605010986, "rewards/margins": 0.9588402509689331, "rewards/rejected": -1.413233757019043, "step": 725 }, { "epoch": 0.08, "learning_rate": 2.79105700573569e-07, "logits/chosen": -3.260671854019165, "logits/rejected": -3.1522653102874756, "logps/chosen": -320.48919677734375, "logps/rejected": -305.313720703125, "loss": 0.4002, "rewards/accuracies": 1.0, "rewards/chosen": -0.06286229938268661, "rewards/margins": 0.8737356066703796, "rewards/rejected": -0.9365978837013245, "step": 726 }, { "epoch": 0.08, "learning_rate": 2.7907058410394473e-07, "logits/chosen": -3.0532050132751465, "logits/rejected": -3.4129347801208496, "logps/chosen": -195.68765258789062, "logps/rejected": -307.7918701171875, "loss": 0.3589, "rewards/accuracies": 1.0, "rewards/chosen": 0.39316853880882263, "rewards/margins": 1.3101814985275269, "rewards/rejected": -0.9170130491256714, "step": 727 }, { "epoch": 0.08, "learning_rate": 2.790354676343205e-07, "logits/chosen": -2.940598487854004, "logits/rejected": -3.1966962814331055, "logps/chosen": -177.54449462890625, "logps/rejected": -275.5166015625, "loss": 0.4473, "rewards/accuracies": 0.75, "rewards/chosen": 0.1405424028635025, "rewards/margins": 1.4285423755645752, "rewards/rejected": -1.2879998683929443, "step": 728 }, { "epoch": 0.08, "learning_rate": 2.7900035116469624e-07, "logits/chosen": -3.1877589225769043, "logits/rejected": -2.766432285308838, "logps/chosen": -304.5781555175781, "logps/rejected": -265.7191467285156, "loss": 0.6039, "rewards/accuracies": 0.75, "rewards/chosen": -0.2719941735267639, "rewards/margins": 0.4328400194644928, "rewards/rejected": -0.7048341631889343, "step": 729 }, { "epoch": 0.08, "learning_rate": 2.78965234695072e-07, "logits/chosen": -3.326904773712158, "logits/rejected": -3.3223938941955566, "logps/chosen": -368.2452697753906, "logps/rejected": -283.09893798828125, "loss": 0.3885, "rewards/accuracies": 0.875, "rewards/chosen": 0.4378724694252014, "rewards/margins": 1.6837018728256226, "rewards/rejected": -1.2458293437957764, "step": 730 }, { "epoch": 0.08, "learning_rate": 2.789301182254477e-07, "logits/chosen": -3.4446797370910645, "logits/rejected": -3.1093382835388184, "logps/chosen": -360.2107849121094, "logps/rejected": -256.012451171875, "loss": 0.4546, "rewards/accuracies": 0.875, "rewards/chosen": -0.02461787313222885, "rewards/margins": 0.7881884574890137, "rewards/rejected": -0.8128063678741455, "step": 731 }, { "epoch": 0.08, "learning_rate": 2.7889500175582345e-07, "logits/chosen": -2.9737820625305176, "logits/rejected": -2.883054256439209, "logps/chosen": -416.96136474609375, "logps/rejected": -240.3757781982422, "loss": 0.6519, "rewards/accuracies": 0.625, "rewards/chosen": -0.05331142991781235, "rewards/margins": 0.8406397104263306, "rewards/rejected": -0.8939511775970459, "step": 732 }, { "epoch": 0.08, "learning_rate": 2.788598852861992e-07, "logits/chosen": -3.5305378437042236, "logits/rejected": -3.084609031677246, "logps/chosen": -303.6122741699219, "logps/rejected": -183.35679626464844, "loss": 0.5426, "rewards/accuracies": 1.0, "rewards/chosen": -0.3534061312675476, "rewards/margins": 0.3605599105358124, "rewards/rejected": -0.7139660120010376, "step": 733 }, { "epoch": 0.08, "learning_rate": 2.7882476881657496e-07, "logits/chosen": -3.27215313911438, "logits/rejected": -3.041658878326416, "logps/chosen": -175.80950927734375, "logps/rejected": -137.91656494140625, "loss": 0.8296, "rewards/accuracies": 0.5, "rewards/chosen": -0.22282934188842773, "rewards/margins": -0.014165259897708893, "rewards/rejected": -0.20866408944129944, "step": 734 }, { "epoch": 0.08, "learning_rate": 2.787896523469507e-07, "logits/chosen": -2.7333507537841797, "logits/rejected": -2.6591265201568604, "logps/chosen": -206.57362365722656, "logps/rejected": -254.91778564453125, "loss": 0.4769, "rewards/accuracies": 0.75, "rewards/chosen": 0.24605610966682434, "rewards/margins": 0.69692462682724, "rewards/rejected": -0.45086848735809326, "step": 735 }, { "epoch": 0.08, "learning_rate": 2.7875453587732646e-07, "logits/chosen": -3.360931873321533, "logits/rejected": -3.6438777446746826, "logps/chosen": -224.9855499267578, "logps/rejected": -209.10198974609375, "loss": 0.284, "rewards/accuracies": 0.875, "rewards/chosen": -0.023202970623970032, "rewards/margins": 1.805598258972168, "rewards/rejected": -1.8288013935089111, "step": 736 }, { "epoch": 0.08, "learning_rate": 2.787194194077022e-07, "logits/chosen": -3.1314797401428223, "logits/rejected": -3.0882620811462402, "logps/chosen": -220.35833740234375, "logps/rejected": -139.05125427246094, "loss": 0.6041, "rewards/accuracies": 0.75, "rewards/chosen": -0.17225147783756256, "rewards/margins": 0.3240305483341217, "rewards/rejected": -0.49628201127052307, "step": 737 }, { "epoch": 0.09, "learning_rate": 2.7868430293807797e-07, "logits/chosen": -3.308011054992676, "logits/rejected": -3.3963537216186523, "logps/chosen": -191.07635498046875, "logps/rejected": -203.13839721679688, "loss": 0.6503, "rewards/accuracies": 0.5, "rewards/chosen": -0.41069403290748596, "rewards/margins": 0.56345534324646, "rewards/rejected": -0.9741493463516235, "step": 738 }, { "epoch": 0.09, "learning_rate": 2.7864918646845367e-07, "logits/chosen": -3.430941104888916, "logits/rejected": -3.525658369064331, "logps/chosen": -92.60293579101562, "logps/rejected": -176.57913208007812, "loss": 0.5038, "rewards/accuracies": 0.75, "rewards/chosen": -0.29544633626937866, "rewards/margins": 1.10063898563385, "rewards/rejected": -1.396085262298584, "step": 739 }, { "epoch": 0.09, "learning_rate": 2.786140699988294e-07, "logits/chosen": -3.457493305206299, "logits/rejected": -3.5594327449798584, "logps/chosen": -176.48135375976562, "logps/rejected": -204.83287048339844, "loss": 0.514, "rewards/accuracies": 0.875, "rewards/chosen": -0.2704823613166809, "rewards/margins": 0.49225959181785583, "rewards/rejected": -0.7627419233322144, "step": 740 }, { "epoch": 0.09, "learning_rate": 2.785789535292052e-07, "logits/chosen": -3.106994152069092, "logits/rejected": -3.050036907196045, "logps/chosen": -262.15106201171875, "logps/rejected": -221.78695678710938, "loss": 0.3798, "rewards/accuracies": 0.875, "rewards/chosen": 0.23475639522075653, "rewards/margins": 1.4000234603881836, "rewards/rejected": -1.1652668714523315, "step": 741 }, { "epoch": 0.09, "learning_rate": 2.7854383705958093e-07, "logits/chosen": -2.7205018997192383, "logits/rejected": -2.717792272567749, "logps/chosen": -309.83148193359375, "logps/rejected": -224.51214599609375, "loss": 0.564, "rewards/accuracies": 0.5, "rewards/chosen": 0.23166640102863312, "rewards/margins": 0.39010512828826904, "rewards/rejected": -0.15843872725963593, "step": 742 }, { "epoch": 0.09, "learning_rate": 2.785087205899567e-07, "logits/chosen": -3.3515236377716064, "logits/rejected": -3.4007985591888428, "logps/chosen": -157.39659118652344, "logps/rejected": -142.01329040527344, "loss": 0.4733, "rewards/accuracies": 0.75, "rewards/chosen": 0.08005556464195251, "rewards/margins": 0.6852942109107971, "rewards/rejected": -0.605238676071167, "step": 743 }, { "epoch": 0.09, "learning_rate": 2.7847360412033244e-07, "logits/chosen": -3.5198566913604736, "logits/rejected": -3.41941499710083, "logps/chosen": -394.3213806152344, "logps/rejected": -339.6938781738281, "loss": 0.2811, "rewards/accuracies": 0.875, "rewards/chosen": 0.6189172267913818, "rewards/margins": 1.63225519657135, "rewards/rejected": -1.0133379697799683, "step": 744 }, { "epoch": 0.09, "learning_rate": 2.7843848765070814e-07, "logits/chosen": -2.3034324645996094, "logits/rejected": -2.556891679763794, "logps/chosen": -265.40338134765625, "logps/rejected": -321.1072082519531, "loss": 0.3482, "rewards/accuracies": 1.0, "rewards/chosen": 0.35625535249710083, "rewards/margins": 1.2761567831039429, "rewards/rejected": -0.9199013710021973, "step": 745 }, { "epoch": 0.09, "learning_rate": 2.7840337118108395e-07, "logits/chosen": -3.111315965652466, "logits/rejected": -3.149352550506592, "logps/chosen": -209.0286102294922, "logps/rejected": -259.8973693847656, "loss": 0.4053, "rewards/accuracies": 0.75, "rewards/chosen": -0.27739983797073364, "rewards/margins": 1.4222211837768555, "rewards/rejected": -1.6996210813522339, "step": 746 }, { "epoch": 0.09, "learning_rate": 2.7836825471145965e-07, "logits/chosen": -3.2193336486816406, "logits/rejected": -3.359908103942871, "logps/chosen": -318.5334167480469, "logps/rejected": -292.0082702636719, "loss": 0.2583, "rewards/accuracies": 1.0, "rewards/chosen": -0.08901204913854599, "rewards/margins": 1.8340709209442139, "rewards/rejected": -1.923082947731018, "step": 747 }, { "epoch": 0.09, "learning_rate": 2.783331382418354e-07, "logits/chosen": -3.4616434574127197, "logits/rejected": -3.588042736053467, "logps/chosen": -270.0181884765625, "logps/rejected": -267.8734130859375, "loss": 0.5003, "rewards/accuracies": 0.75, "rewards/chosen": 0.2925337851047516, "rewards/margins": 1.0193285942077637, "rewards/rejected": -0.7267947793006897, "step": 748 }, { "epoch": 0.09, "learning_rate": 2.7829802177221116e-07, "logits/chosen": -2.8892769813537598, "logits/rejected": -2.9554972648620605, "logps/chosen": -434.30645751953125, "logps/rejected": -344.25494384765625, "loss": 0.5749, "rewards/accuracies": 0.625, "rewards/chosen": -0.3680606782436371, "rewards/margins": 0.4565792381763458, "rewards/rejected": -0.8246399164199829, "step": 749 }, { "epoch": 0.09, "learning_rate": 2.782629053025869e-07, "logits/chosen": -3.30849027633667, "logits/rejected": -3.4568932056427, "logps/chosen": -221.63856506347656, "logps/rejected": -243.6094970703125, "loss": 0.3395, "rewards/accuracies": 0.875, "rewards/chosen": 0.4739278256893158, "rewards/margins": 1.5499975681304932, "rewards/rejected": -1.076069712638855, "step": 750 }, { "epoch": 0.09, "learning_rate": 2.7822778883296266e-07, "logits/chosen": -3.2489686012268066, "logits/rejected": -3.207798957824707, "logps/chosen": -320.4010314941406, "logps/rejected": -239.68475341796875, "loss": 0.3593, "rewards/accuracies": 0.75, "rewards/chosen": 0.23952628672122955, "rewards/margins": 1.6683579683303833, "rewards/rejected": -1.428831696510315, "step": 751 }, { "epoch": 0.09, "learning_rate": 2.7819267236333837e-07, "logits/chosen": -2.9491050243377686, "logits/rejected": -2.9428305625915527, "logps/chosen": -303.6954345703125, "logps/rejected": -190.91000366210938, "loss": 0.5354, "rewards/accuracies": 0.75, "rewards/chosen": -0.04244603216648102, "rewards/margins": 0.6577991843223572, "rewards/rejected": -0.700245201587677, "step": 752 }, { "epoch": 0.09, "learning_rate": 2.781575558937141e-07, "logits/chosen": -3.8878326416015625, "logits/rejected": -3.841360092163086, "logps/chosen": -234.3267059326172, "logps/rejected": -201.7291259765625, "loss": 0.6764, "rewards/accuracies": 0.375, "rewards/chosen": -0.09288483113050461, "rewards/margins": 0.6000270247459412, "rewards/rejected": -0.6929118037223816, "step": 753 }, { "epoch": 0.09, "learning_rate": 2.7812243942408987e-07, "logits/chosen": -2.64681077003479, "logits/rejected": -2.630861520767212, "logps/chosen": -214.25994873046875, "logps/rejected": -235.4342498779297, "loss": 0.4287, "rewards/accuracies": 0.625, "rewards/chosen": -0.15708193182945251, "rewards/margins": 0.9682724475860596, "rewards/rejected": -1.1253544092178345, "step": 754 }, { "epoch": 0.09, "learning_rate": 2.7808732295446563e-07, "logits/chosen": -2.664579391479492, "logits/rejected": -2.812045097351074, "logps/chosen": -235.59559631347656, "logps/rejected": -349.33685302734375, "loss": 0.4064, "rewards/accuracies": 0.875, "rewards/chosen": 0.20749445259571075, "rewards/margins": 1.0535074472427368, "rewards/rejected": -0.8460130095481873, "step": 755 }, { "epoch": 0.09, "learning_rate": 2.780522064848414e-07, "logits/chosen": -2.9259274005889893, "logits/rejected": -3.1783413887023926, "logps/chosen": -299.782470703125, "logps/rejected": -205.81707763671875, "loss": 0.2766, "rewards/accuracies": 1.0, "rewards/chosen": 0.4122794270515442, "rewards/margins": 1.5904133319854736, "rewards/rejected": -1.1781339645385742, "step": 756 }, { "epoch": 0.09, "learning_rate": 2.7801709001521714e-07, "logits/chosen": -3.840954303741455, "logits/rejected": -3.6058359146118164, "logps/chosen": -572.851318359375, "logps/rejected": -389.96197509765625, "loss": 0.4093, "rewards/accuracies": 0.75, "rewards/chosen": 0.19878225028514862, "rewards/margins": 1.4587445259094238, "rewards/rejected": -1.2599623203277588, "step": 757 }, { "epoch": 0.09, "learning_rate": 2.779819735455929e-07, "logits/chosen": -2.9931206703186035, "logits/rejected": -3.1358203887939453, "logps/chosen": -198.49517822265625, "logps/rejected": -294.4404602050781, "loss": 0.4641, "rewards/accuracies": 0.75, "rewards/chosen": -0.4661659598350525, "rewards/margins": 0.8074597716331482, "rewards/rejected": -1.2736257314682007, "step": 758 }, { "epoch": 0.09, "learning_rate": 2.7794685707596864e-07, "logits/chosen": -2.944542407989502, "logits/rejected": -2.916090250015259, "logps/chosen": -337.2884216308594, "logps/rejected": -289.6893310546875, "loss": 0.5873, "rewards/accuracies": 0.5, "rewards/chosen": -0.41707825660705566, "rewards/margins": 0.585429847240448, "rewards/rejected": -1.0025081634521484, "step": 759 }, { "epoch": 0.09, "learning_rate": 2.7791174060634434e-07, "logits/chosen": -2.286489725112915, "logits/rejected": -2.2807350158691406, "logps/chosen": -305.55633544921875, "logps/rejected": -229.83505249023438, "loss": 0.5342, "rewards/accuracies": 0.75, "rewards/chosen": 0.4459977149963379, "rewards/margins": 0.5031130909919739, "rewards/rejected": -0.05711541324853897, "step": 760 }, { "epoch": 0.09, "learning_rate": 2.778766241367201e-07, "logits/chosen": -3.200603485107422, "logits/rejected": -3.091647148132324, "logps/chosen": -225.74549865722656, "logps/rejected": -238.61561584472656, "loss": 0.4729, "rewards/accuracies": 0.875, "rewards/chosen": 0.16022247076034546, "rewards/margins": 0.7869634032249451, "rewards/rejected": -0.6267409920692444, "step": 761 }, { "epoch": 0.09, "learning_rate": 2.7784150766709585e-07, "logits/chosen": -2.5710701942443848, "logits/rejected": -2.792001724243164, "logps/chosen": -530.7432250976562, "logps/rejected": -633.0314331054688, "loss": 0.4267, "rewards/accuracies": 0.75, "rewards/chosen": 0.0067253075540065765, "rewards/margins": 1.1828887462615967, "rewards/rejected": -1.1761634349822998, "step": 762 }, { "epoch": 0.09, "learning_rate": 2.778063911974716e-07, "logits/chosen": -2.9660983085632324, "logits/rejected": -2.9491803646087646, "logps/chosen": -362.95819091796875, "logps/rejected": -312.9381103515625, "loss": 0.5217, "rewards/accuracies": 0.75, "rewards/chosen": -0.3714291453361511, "rewards/margins": 0.9204393625259399, "rewards/rejected": -1.2918685674667358, "step": 763 }, { "epoch": 0.09, "learning_rate": 2.7777127472784736e-07, "logits/chosen": -2.3638367652893066, "logits/rejected": -2.79617977142334, "logps/chosen": -272.8638000488281, "logps/rejected": -167.12689208984375, "loss": 0.7992, "rewards/accuracies": 0.5, "rewards/chosen": -0.24792566895484924, "rewards/margins": -0.1430899202823639, "rewards/rejected": -0.10483574867248535, "step": 764 }, { "epoch": 0.09, "learning_rate": 2.777361582582231e-07, "logits/chosen": -2.7295498847961426, "logits/rejected": -2.5084145069122314, "logps/chosen": -169.173095703125, "logps/rejected": -257.80841064453125, "loss": 0.4239, "rewards/accuracies": 0.875, "rewards/chosen": 0.20656678080558777, "rewards/margins": 1.0060412883758545, "rewards/rejected": -0.7994745373725891, "step": 765 }, { "epoch": 0.09, "learning_rate": 2.777010417885988e-07, "logits/chosen": -2.94695782661438, "logits/rejected": -3.0802979469299316, "logps/chosen": -378.9036560058594, "logps/rejected": -298.5536193847656, "loss": 0.5195, "rewards/accuracies": 0.75, "rewards/chosen": -0.37140989303588867, "rewards/margins": 0.9030638933181763, "rewards/rejected": -1.274473786354065, "step": 766 }, { "epoch": 0.09, "learning_rate": 2.7766592531897457e-07, "logits/chosen": -2.8654913902282715, "logits/rejected": -2.941662311553955, "logps/chosen": -303.6979675292969, "logps/rejected": -182.4124755859375, "loss": 0.3754, "rewards/accuracies": 0.875, "rewards/chosen": 0.09726090729236603, "rewards/margins": 1.0909184217453003, "rewards/rejected": -0.9936575293540955, "step": 767 }, { "epoch": 0.09, "learning_rate": 2.776308088493503e-07, "logits/chosen": -2.7466251850128174, "logits/rejected": -2.6715352535247803, "logps/chosen": -521.7222290039062, "logps/rejected": -291.4895324707031, "loss": 0.4713, "rewards/accuracies": 0.875, "rewards/chosen": 0.0589589886367321, "rewards/margins": 1.0211752653121948, "rewards/rejected": -0.9622161984443665, "step": 768 }, { "epoch": 0.09, "learning_rate": 2.775956923797261e-07, "logits/chosen": -3.3884201049804688, "logits/rejected": -3.282735586166382, "logps/chosen": -343.5610656738281, "logps/rejected": -191.45126342773438, "loss": 0.649, "rewards/accuracies": 0.625, "rewards/chosen": -0.16660653054714203, "rewards/margins": 0.46771326661109924, "rewards/rejected": -0.6343197822570801, "step": 769 }, { "epoch": 0.09, "learning_rate": 2.7756057591010183e-07, "logits/chosen": -2.86013126373291, "logits/rejected": -2.6618406772613525, "logps/chosen": -291.9141845703125, "logps/rejected": -374.5934753417969, "loss": 0.4966, "rewards/accuracies": 0.625, "rewards/chosen": 0.05318525433540344, "rewards/margins": 0.9221298694610596, "rewards/rejected": -0.8689446449279785, "step": 770 }, { "epoch": 0.09, "learning_rate": 2.775254594404776e-07, "logits/chosen": -2.764878273010254, "logits/rejected": -2.7415854930877686, "logps/chosen": -265.850341796875, "logps/rejected": -228.15701293945312, "loss": 0.7045, "rewards/accuracies": 0.5, "rewards/chosen": -0.4472038447856903, "rewards/margins": 0.4980015158653259, "rewards/rejected": -0.9452053308486938, "step": 771 }, { "epoch": 0.09, "learning_rate": 2.7749034297085334e-07, "logits/chosen": -3.147306442260742, "logits/rejected": -3.313295841217041, "logps/chosen": -233.81837463378906, "logps/rejected": -319.2016906738281, "loss": 0.3005, "rewards/accuracies": 1.0, "rewards/chosen": 0.408542275428772, "rewards/margins": 1.486729621887207, "rewards/rejected": -1.0781872272491455, "step": 772 }, { "epoch": 0.09, "learning_rate": 2.774552265012291e-07, "logits/chosen": -2.7149100303649902, "logits/rejected": -3.296889066696167, "logps/chosen": -266.28759765625, "logps/rejected": -227.50338745117188, "loss": 0.4875, "rewards/accuracies": 0.875, "rewards/chosen": 0.14019201695919037, "rewards/margins": 1.007714033126831, "rewards/rejected": -0.8675219416618347, "step": 773 }, { "epoch": 0.09, "learning_rate": 2.774201100316048e-07, "logits/chosen": -3.036712646484375, "logits/rejected": -2.9754140377044678, "logps/chosen": -404.80889892578125, "logps/rejected": -301.0320739746094, "loss": 0.4199, "rewards/accuracies": 0.875, "rewards/chosen": -0.09466972202062607, "rewards/margins": 0.7925654649734497, "rewards/rejected": -0.8872352242469788, "step": 774 }, { "epoch": 0.09, "learning_rate": 2.7738499356198055e-07, "logits/chosen": -2.9769539833068848, "logits/rejected": -3.0576791763305664, "logps/chosen": -347.3688659667969, "logps/rejected": -181.9683837890625, "loss": 0.6884, "rewards/accuracies": 0.625, "rewards/chosen": -0.24589025974273682, "rewards/margins": 0.09954608976840973, "rewards/rejected": -0.34543633460998535, "step": 775 }, { "epoch": 0.09, "learning_rate": 2.773498770923563e-07, "logits/chosen": -2.6938247680664062, "logits/rejected": -2.8581957817077637, "logps/chosen": -228.93560791015625, "logps/rejected": -282.1071472167969, "loss": 0.28, "rewards/accuracies": 1.0, "rewards/chosen": 0.45766735076904297, "rewards/margins": 2.0347132682800293, "rewards/rejected": -1.5770459175109863, "step": 776 }, { "epoch": 0.09, "learning_rate": 2.7731476062273205e-07, "logits/chosen": -2.81034517288208, "logits/rejected": -2.6404664516448975, "logps/chosen": -343.42724609375, "logps/rejected": -247.49749755859375, "loss": 0.7535, "rewards/accuracies": 0.5, "rewards/chosen": -0.1953914314508438, "rewards/margins": 0.09828576445579529, "rewards/rejected": -0.2936772108078003, "step": 777 }, { "epoch": 0.09, "learning_rate": 2.772796441531078e-07, "logits/chosen": -3.4541571140289307, "logits/rejected": -3.171201705932617, "logps/chosen": -265.7227478027344, "logps/rejected": -284.701171875, "loss": 0.4372, "rewards/accuracies": 0.75, "rewards/chosen": -0.10160556435585022, "rewards/margins": 1.1893612146377563, "rewards/rejected": -1.2909667491912842, "step": 778 }, { "epoch": 0.09, "learning_rate": 2.772445276834835e-07, "logits/chosen": -3.4178433418273926, "logits/rejected": -2.9332752227783203, "logps/chosen": -246.45094299316406, "logps/rejected": -228.9134521484375, "loss": 0.5666, "rewards/accuracies": 0.75, "rewards/chosen": -0.0016984790563583374, "rewards/margins": 0.6399407982826233, "rewards/rejected": -0.6416392922401428, "step": 779 }, { "epoch": 0.09, "learning_rate": 2.772094112138593e-07, "logits/chosen": -3.0161616802215576, "logits/rejected": -3.011470317840576, "logps/chosen": -86.36643981933594, "logps/rejected": -119.4278793334961, "loss": 0.6144, "rewards/accuracies": 0.625, "rewards/chosen": -0.04824692755937576, "rewards/margins": 0.3826948404312134, "rewards/rejected": -0.43094176054000854, "step": 780 }, { "epoch": 0.09, "learning_rate": 2.7717429474423507e-07, "logits/chosen": -3.042189598083496, "logits/rejected": -3.003848075866699, "logps/chosen": -192.06727600097656, "logps/rejected": -180.68637084960938, "loss": 0.4963, "rewards/accuracies": 0.875, "rewards/chosen": 0.08379801362752914, "rewards/margins": 0.6655247807502747, "rewards/rejected": -0.5817267894744873, "step": 781 }, { "epoch": 0.09, "learning_rate": 2.7713917827461077e-07, "logits/chosen": -2.302798271179199, "logits/rejected": -2.4597132205963135, "logps/chosen": -172.348388671875, "logps/rejected": -189.34625244140625, "loss": 0.6826, "rewards/accuracies": 0.875, "rewards/chosen": -0.2909846007823944, "rewards/margins": 0.4485825300216675, "rewards/rejected": -0.7395671606063843, "step": 782 }, { "epoch": 0.09, "learning_rate": 2.771040618049865e-07, "logits/chosen": -2.9049015045166016, "logits/rejected": -2.8664212226867676, "logps/chosen": -287.1170349121094, "logps/rejected": -319.918701171875, "loss": 0.357, "rewards/accuracies": 0.875, "rewards/chosen": 0.17138461768627167, "rewards/margins": 1.514103651046753, "rewards/rejected": -1.3427191972732544, "step": 783 }, { "epoch": 0.09, "learning_rate": 2.770689453353623e-07, "logits/chosen": -2.58640456199646, "logits/rejected": -2.814419746398926, "logps/chosen": -158.6343994140625, "logps/rejected": -145.18853759765625, "loss": 0.6517, "rewards/accuracies": 0.625, "rewards/chosen": -0.047204241156578064, "rewards/margins": 0.2903803288936615, "rewards/rejected": -0.33758458495140076, "step": 784 }, { "epoch": 0.09, "learning_rate": 2.7703382886573803e-07, "logits/chosen": -3.515599250793457, "logits/rejected": -3.826673984527588, "logps/chosen": -179.5092315673828, "logps/rejected": -316.6686706542969, "loss": 0.2825, "rewards/accuracies": 0.75, "rewards/chosen": 0.13998627662658691, "rewards/margins": 1.968587875366211, "rewards/rejected": -1.8286018371582031, "step": 785 }, { "epoch": 0.09, "learning_rate": 2.769987123961138e-07, "logits/chosen": -2.4906749725341797, "logits/rejected": -2.799945831298828, "logps/chosen": -432.70135498046875, "logps/rejected": -237.13162231445312, "loss": 0.3155, "rewards/accuracies": 0.875, "rewards/chosen": 0.4119276702404022, "rewards/margins": 1.6757333278656006, "rewards/rejected": -1.2638057470321655, "step": 786 }, { "epoch": 0.09, "learning_rate": 2.769635959264895e-07, "logits/chosen": -3.3350143432617188, "logits/rejected": -2.9567699432373047, "logps/chosen": -167.20494079589844, "logps/rejected": -120.77100372314453, "loss": 0.8804, "rewards/accuracies": 0.5, "rewards/chosen": 0.12553802132606506, "rewards/margins": 0.09677297621965408, "rewards/rejected": 0.02876507118344307, "step": 787 }, { "epoch": 0.09, "learning_rate": 2.7692847945686524e-07, "logits/chosen": -2.9183406829833984, "logits/rejected": -2.7444400787353516, "logps/chosen": -353.8623046875, "logps/rejected": -377.4620361328125, "loss": 0.4291, "rewards/accuracies": 0.75, "rewards/chosen": 0.07431145012378693, "rewards/margins": 1.0330679416656494, "rewards/rejected": -0.9587564468383789, "step": 788 }, { "epoch": 0.09, "learning_rate": 2.76893362987241e-07, "logits/chosen": -3.5005249977111816, "logits/rejected": -3.4073243141174316, "logps/chosen": -304.9727783203125, "logps/rejected": -259.5607604980469, "loss": 0.3121, "rewards/accuracies": 0.875, "rewards/chosen": 0.35121098160743713, "rewards/margins": 1.8979530334472656, "rewards/rejected": -1.5467422008514404, "step": 789 }, { "epoch": 0.09, "learning_rate": 2.7685824651761675e-07, "logits/chosen": -2.9321601390838623, "logits/rejected": -3.0256450176239014, "logps/chosen": -180.75299072265625, "logps/rejected": -232.77297973632812, "loss": 0.4668, "rewards/accuracies": 0.625, "rewards/chosen": 0.013980764895677567, "rewards/margins": 1.9283115863800049, "rewards/rejected": -1.9143307209014893, "step": 790 }, { "epoch": 0.09, "learning_rate": 2.768231300479925e-07, "logits/chosen": -2.7284204959869385, "logits/rejected": -2.7354629039764404, "logps/chosen": -211.25372314453125, "logps/rejected": -270.4598388671875, "loss": 0.4324, "rewards/accuracies": 0.75, "rewards/chosen": 0.17289772629737854, "rewards/margins": 0.8530900478363037, "rewards/rejected": -0.6801923513412476, "step": 791 }, { "epoch": 0.09, "learning_rate": 2.7678801357836826e-07, "logits/chosen": -2.7718188762664795, "logits/rejected": -2.828396797180176, "logps/chosen": -333.40509033203125, "logps/rejected": -246.76522827148438, "loss": 0.5144, "rewards/accuracies": 0.75, "rewards/chosen": 0.13157925009727478, "rewards/margins": 0.6368228793144226, "rewards/rejected": -0.5052436590194702, "step": 792 }, { "epoch": 0.09, "learning_rate": 2.76752897108744e-07, "logits/chosen": -3.1648764610290527, "logits/rejected": -3.386141777038574, "logps/chosen": -187.5034942626953, "logps/rejected": -299.50665283203125, "loss": 0.3324, "rewards/accuracies": 0.875, "rewards/chosen": -0.35229629278182983, "rewards/margins": 1.4573674201965332, "rewards/rejected": -1.8096636533737183, "step": 793 }, { "epoch": 0.09, "learning_rate": 2.7671778063911976e-07, "logits/chosen": -2.5413899421691895, "logits/rejected": -2.4490833282470703, "logps/chosen": -193.41082763671875, "logps/rejected": -264.7743225097656, "loss": 0.4126, "rewards/accuracies": 0.75, "rewards/chosen": 0.2964949607849121, "rewards/margins": 0.988365650177002, "rewards/rejected": -0.6918706893920898, "step": 794 }, { "epoch": 0.09, "learning_rate": 2.7668266416949546e-07, "logits/chosen": -3.1381118297576904, "logits/rejected": -3.390993595123291, "logps/chosen": -166.88821411132812, "logps/rejected": -233.92172241210938, "loss": 0.3307, "rewards/accuracies": 0.875, "rewards/chosen": 0.00010221544653177261, "rewards/margins": 1.4514660835266113, "rewards/rejected": -1.4513638019561768, "step": 795 }, { "epoch": 0.09, "learning_rate": 2.766475476998712e-07, "logits/chosen": -3.5331833362579346, "logits/rejected": -3.3989603519439697, "logps/chosen": -178.17892456054688, "logps/rejected": -175.97079467773438, "loss": 0.4038, "rewards/accuracies": 0.875, "rewards/chosen": 0.16475343704223633, "rewards/margins": 1.1457151174545288, "rewards/rejected": -0.9809616804122925, "step": 796 }, { "epoch": 0.09, "learning_rate": 2.7661243123024697e-07, "logits/chosen": -3.75699520111084, "logits/rejected": -3.80007266998291, "logps/chosen": -246.5064697265625, "logps/rejected": -291.75048828125, "loss": 0.4595, "rewards/accuracies": 0.875, "rewards/chosen": -0.10673819482326508, "rewards/margins": 0.8938522934913635, "rewards/rejected": -1.0005903244018555, "step": 797 }, { "epoch": 0.09, "learning_rate": 2.765773147606227e-07, "logits/chosen": -2.968410015106201, "logits/rejected": -3.3296732902526855, "logps/chosen": -205.11862182617188, "logps/rejected": -210.55772399902344, "loss": 0.3367, "rewards/accuracies": 0.875, "rewards/chosen": 0.09104548394680023, "rewards/margins": 1.931513786315918, "rewards/rejected": -1.840468406677246, "step": 798 }, { "epoch": 0.09, "learning_rate": 2.765421982909985e-07, "logits/chosen": -3.4944396018981934, "logits/rejected": -3.2029104232788086, "logps/chosen": -191.5533447265625, "logps/rejected": -205.69796752929688, "loss": 0.3785, "rewards/accuracies": 0.875, "rewards/chosen": -0.1512124389410019, "rewards/margins": 1.1898620128631592, "rewards/rejected": -1.3410743474960327, "step": 799 }, { "epoch": 0.09, "learning_rate": 2.765070818213742e-07, "logits/chosen": -3.664780616760254, "logits/rejected": -3.68318510055542, "logps/chosen": -230.27467346191406, "logps/rejected": -275.4501037597656, "loss": 0.2577, "rewards/accuracies": 1.0, "rewards/chosen": 0.15244176983833313, "rewards/margins": 2.1232802867889404, "rewards/rejected": -1.9708384275436401, "step": 800 }, { "epoch": 0.09, "learning_rate": 2.7647196535174993e-07, "logits/chosen": -3.46421480178833, "logits/rejected": -3.676669120788574, "logps/chosen": -209.22488403320312, "logps/rejected": -247.3558349609375, "loss": 0.4863, "rewards/accuracies": 0.625, "rewards/chosen": 0.1414693295955658, "rewards/margins": 1.1148982048034668, "rewards/rejected": -0.9734289050102234, "step": 801 }, { "epoch": 0.09, "learning_rate": 2.7643684888212574e-07, "logits/chosen": -3.051532745361328, "logits/rejected": -2.6125741004943848, "logps/chosen": -296.45037841796875, "logps/rejected": -178.32009887695312, "loss": 0.4827, "rewards/accuracies": 0.75, "rewards/chosen": 0.17951726913452148, "rewards/margins": 0.6749148368835449, "rewards/rejected": -0.49539750814437866, "step": 802 }, { "epoch": 0.09, "learning_rate": 2.7640173241250144e-07, "logits/chosen": -3.62080717086792, "logits/rejected": -3.388254404067993, "logps/chosen": -241.11851501464844, "logps/rejected": -130.45437622070312, "loss": 0.6223, "rewards/accuracies": 0.75, "rewards/chosen": 0.14625240862369537, "rewards/margins": 0.4451746940612793, "rewards/rejected": -0.29892224073410034, "step": 803 }, { "epoch": 0.09, "learning_rate": 2.763666159428772e-07, "logits/chosen": -3.0720181465148926, "logits/rejected": -3.234790802001953, "logps/chosen": -272.6170349121094, "logps/rejected": -239.00579833984375, "loss": 0.6243, "rewards/accuracies": 0.625, "rewards/chosen": -0.4109915494918823, "rewards/margins": 0.5064557790756226, "rewards/rejected": -0.9174473881721497, "step": 804 }, { "epoch": 0.09, "learning_rate": 2.7633149947325295e-07, "logits/chosen": -2.8254270553588867, "logits/rejected": -2.8319807052612305, "logps/chosen": -528.080810546875, "logps/rejected": -239.9102783203125, "loss": 0.4123, "rewards/accuracies": 0.875, "rewards/chosen": 0.0475018247961998, "rewards/margins": 1.0276113748550415, "rewards/rejected": -0.9801095724105835, "step": 805 }, { "epoch": 0.09, "learning_rate": 2.762963830036287e-07, "logits/chosen": -3.577845335006714, "logits/rejected": -3.316768169403076, "logps/chosen": -264.29925537109375, "logps/rejected": -248.85610961914062, "loss": 0.5777, "rewards/accuracies": 0.625, "rewards/chosen": 0.012349037453532219, "rewards/margins": 0.6933183670043945, "rewards/rejected": -0.6809692978858948, "step": 806 }, { "epoch": 0.09, "learning_rate": 2.7626126653400446e-07, "logits/chosen": -2.477569103240967, "logits/rejected": -2.512051582336426, "logps/chosen": -417.02008056640625, "logps/rejected": -274.2406005859375, "loss": 0.2798, "rewards/accuracies": 0.875, "rewards/chosen": 0.21478520333766937, "rewards/margins": 1.5594470500946045, "rewards/rejected": -1.344661831855774, "step": 807 }, { "epoch": 0.09, "learning_rate": 2.7622615006438016e-07, "logits/chosen": -3.7279951572418213, "logits/rejected": -3.8948373794555664, "logps/chosen": -129.264892578125, "logps/rejected": -267.4670104980469, "loss": 0.507, "rewards/accuracies": 0.75, "rewards/chosen": -0.2151148021221161, "rewards/margins": 0.9731276631355286, "rewards/rejected": -1.1882424354553223, "step": 808 }, { "epoch": 0.09, "learning_rate": 2.761910335947559e-07, "logits/chosen": -2.6491005420684814, "logits/rejected": -2.611477851867676, "logps/chosen": -612.55419921875, "logps/rejected": -386.42523193359375, "loss": 0.2611, "rewards/accuracies": 1.0, "rewards/chosen": 0.08794021606445312, "rewards/margins": 1.4910738468170166, "rewards/rejected": -1.4031336307525635, "step": 809 }, { "epoch": 0.09, "learning_rate": 2.7615591712513167e-07, "logits/chosen": -3.9967260360717773, "logits/rejected": -3.5841431617736816, "logps/chosen": -333.91021728515625, "logps/rejected": -238.244873046875, "loss": 0.4027, "rewards/accuracies": 0.75, "rewards/chosen": -0.3397977352142334, "rewards/margins": 1.2786295413970947, "rewards/rejected": -1.6184272766113281, "step": 810 }, { "epoch": 0.09, "learning_rate": 2.761208006555074e-07, "logits/chosen": -2.7773990631103516, "logits/rejected": -2.9145822525024414, "logps/chosen": -298.7373046875, "logps/rejected": -240.71343994140625, "loss": 0.4287, "rewards/accuracies": 0.875, "rewards/chosen": 0.04489287734031677, "rewards/margins": 0.9263416528701782, "rewards/rejected": -0.8814487457275391, "step": 811 }, { "epoch": 0.09, "learning_rate": 2.7608568418588317e-07, "logits/chosen": -2.893343448638916, "logits/rejected": -2.9592838287353516, "logps/chosen": -276.739013671875, "logps/rejected": -235.43222045898438, "loss": 0.3445, "rewards/accuracies": 1.0, "rewards/chosen": 0.05172871798276901, "rewards/margins": 1.1782557964324951, "rewards/rejected": -1.126527190208435, "step": 812 }, { "epoch": 0.09, "learning_rate": 2.760505677162589e-07, "logits/chosen": -3.150068759918213, "logits/rejected": -3.132845401763916, "logps/chosen": -199.677001953125, "logps/rejected": -185.10208129882812, "loss": 0.4059, "rewards/accuracies": 0.75, "rewards/chosen": -0.07125253975391388, "rewards/margins": 1.0133061408996582, "rewards/rejected": -1.0845587253570557, "step": 813 }, { "epoch": 0.09, "learning_rate": 2.760154512466347e-07, "logits/chosen": -2.9751601219177246, "logits/rejected": -3.251437187194824, "logps/chosen": -136.62762451171875, "logps/rejected": -210.20529174804688, "loss": 0.4626, "rewards/accuracies": 0.875, "rewards/chosen": 0.09316956251859665, "rewards/margins": 1.0490405559539795, "rewards/rejected": -0.9558709859848022, "step": 814 }, { "epoch": 0.09, "learning_rate": 2.7598033477701044e-07, "logits/chosen": -2.8333122730255127, "logits/rejected": -2.6702632904052734, "logps/chosen": -253.98123168945312, "logps/rejected": -329.1234130859375, "loss": 0.6227, "rewards/accuracies": 0.75, "rewards/chosen": 0.02935744822025299, "rewards/margins": 0.8620047569274902, "rewards/rejected": -0.8326473832130432, "step": 815 }, { "epoch": 0.09, "learning_rate": 2.7594521830738614e-07, "logits/chosen": -3.494313955307007, "logits/rejected": -3.568282127380371, "logps/chosen": -188.5488739013672, "logps/rejected": -340.9733581542969, "loss": 0.7583, "rewards/accuracies": 0.5, "rewards/chosen": -0.7036993503570557, "rewards/margins": -0.027393028140068054, "rewards/rejected": -0.6763062477111816, "step": 816 }, { "epoch": 0.09, "learning_rate": 2.759101018377619e-07, "logits/chosen": -3.356037139892578, "logits/rejected": -3.520613431930542, "logps/chosen": -255.93846130371094, "logps/rejected": -216.89398193359375, "loss": 0.349, "rewards/accuracies": 0.875, "rewards/chosen": 0.1564173698425293, "rewards/margins": 1.223031759262085, "rewards/rejected": -1.0666143894195557, "step": 817 }, { "epoch": 0.09, "learning_rate": 2.7587498536813764e-07, "logits/chosen": -2.651693105697632, "logits/rejected": -2.681769847869873, "logps/chosen": -238.81459045410156, "logps/rejected": -188.87985229492188, "loss": 0.7093, "rewards/accuracies": 0.625, "rewards/chosen": -0.2916595935821533, "rewards/margins": 0.1870458424091339, "rewards/rejected": -0.478705495595932, "step": 818 }, { "epoch": 0.09, "learning_rate": 2.758398688985134e-07, "logits/chosen": -3.432954788208008, "logits/rejected": -3.379977226257324, "logps/chosen": -304.18353271484375, "logps/rejected": -181.3824005126953, "loss": 0.3633, "rewards/accuracies": 0.875, "rewards/chosen": 0.45107027888298035, "rewards/margins": 1.181410551071167, "rewards/rejected": -0.730340301990509, "step": 819 }, { "epoch": 0.09, "learning_rate": 2.7580475242888915e-07, "logits/chosen": -3.4822726249694824, "logits/rejected": -3.2820496559143066, "logps/chosen": -168.00473022460938, "logps/rejected": -212.29330444335938, "loss": 0.3012, "rewards/accuracies": 0.875, "rewards/chosen": 0.3096015155315399, "rewards/margins": 1.4323790073394775, "rewards/rejected": -1.1227775812149048, "step": 820 }, { "epoch": 0.09, "learning_rate": 2.7576963595926485e-07, "logits/chosen": -2.952803373336792, "logits/rejected": -3.2074875831604004, "logps/chosen": -200.18057250976562, "logps/rejected": -173.583984375, "loss": 0.5475, "rewards/accuracies": 0.75, "rewards/chosen": -0.14085188508033752, "rewards/margins": 0.47980016469955444, "rewards/rejected": -0.6206520795822144, "step": 821 }, { "epoch": 0.09, "learning_rate": 2.757345194896406e-07, "logits/chosen": -4.037409782409668, "logits/rejected": -3.581698417663574, "logps/chosen": -215.97024536132812, "logps/rejected": -261.63427734375, "loss": 0.6184, "rewards/accuracies": 0.875, "rewards/chosen": -0.34472501277923584, "rewards/margins": 0.3228113055229187, "rewards/rejected": -0.6675362586975098, "step": 822 }, { "epoch": 0.09, "learning_rate": 2.7569940302001636e-07, "logits/chosen": -3.261975049972534, "logits/rejected": -3.4368786811828613, "logps/chosen": -180.62832641601562, "logps/rejected": -250.05682373046875, "loss": 0.478, "rewards/accuracies": 0.875, "rewards/chosen": 0.21257230639457703, "rewards/margins": 1.1826269626617432, "rewards/rejected": -0.970054566860199, "step": 823 }, { "epoch": 0.09, "learning_rate": 2.756642865503921e-07, "logits/chosen": -3.1896753311157227, "logits/rejected": -2.911609172821045, "logps/chosen": -180.71978759765625, "logps/rejected": -230.10848999023438, "loss": 0.3973, "rewards/accuracies": 0.875, "rewards/chosen": -0.03788638114929199, "rewards/margins": 1.1634998321533203, "rewards/rejected": -1.2013862133026123, "step": 824 }, { "epoch": 0.1, "learning_rate": 2.7562917008076787e-07, "logits/chosen": -2.711021900177002, "logits/rejected": -2.6782000064849854, "logps/chosen": -178.26597595214844, "logps/rejected": -254.74911499023438, "loss": 0.3755, "rewards/accuracies": 0.875, "rewards/chosen": -0.09842272102832794, "rewards/margins": 1.270041584968567, "rewards/rejected": -1.3684642314910889, "step": 825 }, { "epoch": 0.1, "learning_rate": 2.755940536111436e-07, "logits/chosen": -2.9815597534179688, "logits/rejected": -3.1267921924591064, "logps/chosen": -253.5839080810547, "logps/rejected": -224.4720458984375, "loss": 0.5343, "rewards/accuracies": 0.625, "rewards/chosen": -0.1619645357131958, "rewards/margins": 0.6192668080329895, "rewards/rejected": -0.7812313437461853, "step": 826 }, { "epoch": 0.1, "learning_rate": 2.755589371415194e-07, "logits/chosen": -3.3388805389404297, "logits/rejected": -3.301466464996338, "logps/chosen": -185.37667846679688, "logps/rejected": -427.2044677734375, "loss": 0.726, "rewards/accuracies": 0.5, "rewards/chosen": -0.13455341756343842, "rewards/margins": 0.20535269379615784, "rewards/rejected": -0.33990606665611267, "step": 827 }, { "epoch": 0.1, "learning_rate": 2.7552382067189513e-07, "logits/chosen": -1.9774956703186035, "logits/rejected": -1.982587456703186, "logps/chosen": -225.5425567626953, "logps/rejected": -163.74658203125, "loss": 0.5317, "rewards/accuracies": 0.625, "rewards/chosen": 0.3739159405231476, "rewards/margins": 0.5552279949188232, "rewards/rejected": -0.18131211400032043, "step": 828 }, { "epoch": 0.1, "learning_rate": 2.7548870420227083e-07, "logits/chosen": -2.386810302734375, "logits/rejected": -2.4659695625305176, "logps/chosen": -403.2242431640625, "logps/rejected": -334.8184814453125, "loss": 0.6501, "rewards/accuracies": 0.75, "rewards/chosen": 0.06281670928001404, "rewards/margins": 0.6998395323753357, "rewards/rejected": -0.637022852897644, "step": 829 }, { "epoch": 0.1, "learning_rate": 2.754535877326466e-07, "logits/chosen": -3.265871047973633, "logits/rejected": -3.0587384700775146, "logps/chosen": -373.1357727050781, "logps/rejected": -258.71319580078125, "loss": 0.7587, "rewards/accuracies": 0.625, "rewards/chosen": -0.2828088700771332, "rewards/margins": 0.06885270774364471, "rewards/rejected": -0.3516615927219391, "step": 830 }, { "epoch": 0.1, "learning_rate": 2.7541847126302234e-07, "logits/chosen": -3.139171838760376, "logits/rejected": -2.7939915657043457, "logps/chosen": -218.7184295654297, "logps/rejected": -242.91561889648438, "loss": 0.8487, "rewards/accuracies": 0.75, "rewards/chosen": -0.45756590366363525, "rewards/margins": 1.143376350402832, "rewards/rejected": -1.6009422540664673, "step": 831 }, { "epoch": 0.1, "learning_rate": 2.753833547933981e-07, "logits/chosen": -3.090639591217041, "logits/rejected": -3.123727321624756, "logps/chosen": -301.5442199707031, "logps/rejected": -285.9490661621094, "loss": 0.578, "rewards/accuracies": 0.5, "rewards/chosen": -0.04124303162097931, "rewards/margins": 0.6752254366874695, "rewards/rejected": -0.7164684534072876, "step": 832 }, { "epoch": 0.1, "learning_rate": 2.7534823832377385e-07, "logits/chosen": -3.0568671226501465, "logits/rejected": -3.0110726356506348, "logps/chosen": -183.74142456054688, "logps/rejected": -130.54478454589844, "loss": 0.5414, "rewards/accuracies": 0.75, "rewards/chosen": 0.14824596047401428, "rewards/margins": 0.8876832723617554, "rewards/rejected": -0.7394372820854187, "step": 833 }, { "epoch": 0.1, "learning_rate": 2.753131218541496e-07, "logits/chosen": -2.9822592735290527, "logits/rejected": -2.7962350845336914, "logps/chosen": -286.7266845703125, "logps/rejected": -265.45428466796875, "loss": 0.4306, "rewards/accuracies": 0.75, "rewards/chosen": 0.18657168745994568, "rewards/margins": 1.0591834783554077, "rewards/rejected": -0.8726117610931396, "step": 834 }, { "epoch": 0.1, "learning_rate": 2.752780053845253e-07, "logits/chosen": -3.128429651260376, "logits/rejected": -2.7803685665130615, "logps/chosen": -256.7389831542969, "logps/rejected": -218.9199676513672, "loss": 0.6355, "rewards/accuracies": 0.5, "rewards/chosen": -0.1512812227010727, "rewards/margins": 0.4312949478626251, "rewards/rejected": -0.5825761556625366, "step": 835 }, { "epoch": 0.1, "learning_rate": 2.752428889149011e-07, "logits/chosen": -2.8643195629119873, "logits/rejected": -2.8368594646453857, "logps/chosen": -246.2068634033203, "logps/rejected": -355.43646240234375, "loss": 0.4653, "rewards/accuracies": 0.75, "rewards/chosen": -0.08422783017158508, "rewards/margins": 1.0881870985031128, "rewards/rejected": -1.1724148988723755, "step": 836 }, { "epoch": 0.1, "learning_rate": 2.752077724452768e-07, "logits/chosen": -3.3321077823638916, "logits/rejected": -3.1884727478027344, "logps/chosen": -169.97579956054688, "logps/rejected": -481.00616455078125, "loss": 0.2769, "rewards/accuracies": 0.875, "rewards/chosen": -0.13322196900844574, "rewards/margins": 2.267456531524658, "rewards/rejected": -2.4006783962249756, "step": 837 }, { "epoch": 0.1, "learning_rate": 2.7517265597565256e-07, "logits/chosen": -3.2844486236572266, "logits/rejected": -3.2791526317596436, "logps/chosen": -167.28054809570312, "logps/rejected": -237.37396240234375, "loss": 0.5303, "rewards/accuracies": 0.625, "rewards/chosen": 0.38232889771461487, "rewards/margins": 1.035386562347412, "rewards/rejected": -0.6530575752258301, "step": 838 }, { "epoch": 0.1, "learning_rate": 2.751375395060283e-07, "logits/chosen": -2.8910608291625977, "logits/rejected": -2.490525722503662, "logps/chosen": -239.01239013671875, "logps/rejected": -161.4678192138672, "loss": 0.5792, "rewards/accuracies": 0.75, "rewards/chosen": -0.18761992454528809, "rewards/margins": 0.32470637559890747, "rewards/rejected": -0.5123263001441956, "step": 839 }, { "epoch": 0.1, "learning_rate": 2.7510242303640407e-07, "logits/chosen": -3.2921488285064697, "logits/rejected": -3.3097188472747803, "logps/chosen": -295.316650390625, "logps/rejected": -115.58242797851562, "loss": 0.4937, "rewards/accuracies": 0.875, "rewards/chosen": -0.04165259748697281, "rewards/margins": 0.5823967456817627, "rewards/rejected": -0.6240493655204773, "step": 840 }, { "epoch": 0.1, "learning_rate": 2.750673065667798e-07, "logits/chosen": -2.654162645339966, "logits/rejected": -2.853571891784668, "logps/chosen": -246.6444091796875, "logps/rejected": -366.7325134277344, "loss": 0.5326, "rewards/accuracies": 0.75, "rewards/chosen": -0.4231809377670288, "rewards/margins": 0.7851053476333618, "rewards/rejected": -1.2082862854003906, "step": 841 }, { "epoch": 0.1, "learning_rate": 2.750321900971556e-07, "logits/chosen": -3.2783055305480957, "logits/rejected": -3.299618721008301, "logps/chosen": -117.82460021972656, "logps/rejected": -244.18411254882812, "loss": 0.4121, "rewards/accuracies": 0.875, "rewards/chosen": 0.24455642700195312, "rewards/margins": 1.2014939785003662, "rewards/rejected": -0.9569374918937683, "step": 842 }, { "epoch": 0.1, "learning_rate": 2.749970736275313e-07, "logits/chosen": -2.9774930477142334, "logits/rejected": -2.6807808876037598, "logps/chosen": -351.3978576660156, "logps/rejected": -279.03704833984375, "loss": 0.8492, "rewards/accuracies": 0.5, "rewards/chosen": -0.23986348509788513, "rewards/margins": -0.16272865235805511, "rewards/rejected": -0.07713484764099121, "step": 843 }, { "epoch": 0.1, "learning_rate": 2.7496195715790703e-07, "logits/chosen": -2.1806719303131104, "logits/rejected": -2.1184065341949463, "logps/chosen": -358.91851806640625, "logps/rejected": -235.81985473632812, "loss": 0.4973, "rewards/accuracies": 0.625, "rewards/chosen": 0.027127843350172043, "rewards/margins": 0.9274944067001343, "rewards/rejected": -0.9003665447235107, "step": 844 }, { "epoch": 0.1, "learning_rate": 2.749268406882828e-07, "logits/chosen": -2.5226049423217773, "logits/rejected": -2.5325746536254883, "logps/chosen": -367.41192626953125, "logps/rejected": -411.62506103515625, "loss": 0.4258, "rewards/accuracies": 0.875, "rewards/chosen": 0.27732980251312256, "rewards/margins": 1.2960268259048462, "rewards/rejected": -1.018696904182434, "step": 845 }, { "epoch": 0.1, "learning_rate": 2.7489172421865854e-07, "logits/chosen": -4.083926200866699, "logits/rejected": -3.8236048221588135, "logps/chosen": -106.34597778320312, "logps/rejected": -89.45291137695312, "loss": 0.7906, "rewards/accuracies": 0.375, "rewards/chosen": -0.29099392890930176, "rewards/margins": -0.10117463767528534, "rewards/rejected": -0.18981926143169403, "step": 846 }, { "epoch": 0.1, "learning_rate": 2.748566077490343e-07, "logits/chosen": -3.0409655570983887, "logits/rejected": -3.036008358001709, "logps/chosen": -146.0890350341797, "logps/rejected": -230.4691925048828, "loss": 0.4155, "rewards/accuracies": 0.625, "rewards/chosen": 0.3017933666706085, "rewards/margins": 1.3907811641693115, "rewards/rejected": -1.0889878273010254, "step": 847 }, { "epoch": 0.1, "learning_rate": 2.7482149127941005e-07, "logits/chosen": -3.3585784435272217, "logits/rejected": -3.127389907836914, "logps/chosen": -310.48931884765625, "logps/rejected": -283.6368408203125, "loss": 0.5293, "rewards/accuracies": 0.625, "rewards/chosen": -0.39480113983154297, "rewards/margins": 0.7650704383850098, "rewards/rejected": -1.1598716974258423, "step": 848 }, { "epoch": 0.1, "learning_rate": 2.747863748097858e-07, "logits/chosen": -2.850364923477173, "logits/rejected": -2.8961315155029297, "logps/chosen": -255.3311309814453, "logps/rejected": -333.34375, "loss": 0.933, "rewards/accuracies": 0.5, "rewards/chosen": -0.19613291323184967, "rewards/margins": 0.05653604865074158, "rewards/rejected": -0.25266897678375244, "step": 849 }, { "epoch": 0.1, "learning_rate": 2.747512583401615e-07, "logits/chosen": -2.9712603092193604, "logits/rejected": -3.238640546798706, "logps/chosen": -202.01333618164062, "logps/rejected": -219.466796875, "loss": 0.7301, "rewards/accuracies": 0.5, "rewards/chosen": -0.2153870016336441, "rewards/margins": 0.3994392156600952, "rewards/rejected": -0.6148262619972229, "step": 850 }, { "epoch": 0.1, "learning_rate": 2.7471614187053726e-07, "logits/chosen": -3.9397530555725098, "logits/rejected": -3.958695650100708, "logps/chosen": -234.10787963867188, "logps/rejected": -176.3972930908203, "loss": 0.2293, "rewards/accuracies": 1.0, "rewards/chosen": -0.007220447063446045, "rewards/margins": 1.64689302444458, "rewards/rejected": -1.6541134119033813, "step": 851 }, { "epoch": 0.1, "learning_rate": 2.74681025400913e-07, "logits/chosen": -3.089216709136963, "logits/rejected": -3.2197213172912598, "logps/chosen": -311.3764343261719, "logps/rejected": -236.69052124023438, "loss": 0.5464, "rewards/accuracies": 0.625, "rewards/chosen": -0.2225361466407776, "rewards/margins": 0.6139265894889832, "rewards/rejected": -0.8364627361297607, "step": 852 }, { "epoch": 0.1, "learning_rate": 2.7464590893128876e-07, "logits/chosen": -3.5604469776153564, "logits/rejected": -3.5403032302856445, "logps/chosen": -192.53421020507812, "logps/rejected": -211.67202758789062, "loss": 0.3748, "rewards/accuracies": 0.875, "rewards/chosen": 0.1529182344675064, "rewards/margins": 1.1420307159423828, "rewards/rejected": -0.9891124963760376, "step": 853 }, { "epoch": 0.1, "learning_rate": 2.746107924616645e-07, "logits/chosen": -2.6424708366394043, "logits/rejected": -2.614380359649658, "logps/chosen": -266.469482421875, "logps/rejected": -240.80807495117188, "loss": 0.4944, "rewards/accuracies": 0.875, "rewards/chosen": -0.21823687851428986, "rewards/margins": 0.7156004905700684, "rewards/rejected": -0.9338374137878418, "step": 854 }, { "epoch": 0.1, "learning_rate": 2.7457567599204027e-07, "logits/chosen": -3.251129150390625, "logits/rejected": -3.387636423110962, "logps/chosen": -175.33871459960938, "logps/rejected": -150.60842895507812, "loss": 0.5048, "rewards/accuracies": 0.75, "rewards/chosen": -0.012763511389493942, "rewards/margins": 1.0121045112609863, "rewards/rejected": -1.0248678922653198, "step": 855 }, { "epoch": 0.1, "learning_rate": 2.7454055952241597e-07, "logits/chosen": -2.779937744140625, "logits/rejected": -2.8106565475463867, "logps/chosen": -113.65443420410156, "logps/rejected": -220.91098022460938, "loss": 0.464, "rewards/accuracies": 0.75, "rewards/chosen": -0.08553160727024078, "rewards/margins": 1.1640958786010742, "rewards/rejected": -1.2496274709701538, "step": 856 }, { "epoch": 0.1, "learning_rate": 2.745054430527917e-07, "logits/chosen": -2.8897969722747803, "logits/rejected": -2.5848641395568848, "logps/chosen": -314.1836242675781, "logps/rejected": -306.003173828125, "loss": 0.3693, "rewards/accuracies": 0.875, "rewards/chosen": 0.28883516788482666, "rewards/margins": 1.6706345081329346, "rewards/rejected": -1.3817994594573975, "step": 857 }, { "epoch": 0.1, "learning_rate": 2.744703265831675e-07, "logits/chosen": -3.357424736022949, "logits/rejected": -3.068399667739868, "logps/chosen": -221.11459350585938, "logps/rejected": -202.184326171875, "loss": 0.3669, "rewards/accuracies": 0.75, "rewards/chosen": 0.06499813497066498, "rewards/margins": 1.5424292087554932, "rewards/rejected": -1.4774311780929565, "step": 858 }, { "epoch": 0.1, "learning_rate": 2.7443521011354323e-07, "logits/chosen": -2.8036627769470215, "logits/rejected": -3.0134940147399902, "logps/chosen": -296.6165466308594, "logps/rejected": -218.00172424316406, "loss": 0.4799, "rewards/accuracies": 0.75, "rewards/chosen": 0.07972222566604614, "rewards/margins": 0.7851074934005737, "rewards/rejected": -0.7053853273391724, "step": 859 }, { "epoch": 0.1, "learning_rate": 2.74400093643919e-07, "logits/chosen": -3.318068265914917, "logits/rejected": -3.22570538520813, "logps/chosen": -304.4339599609375, "logps/rejected": -217.77716064453125, "loss": 0.4289, "rewards/accuracies": 0.75, "rewards/chosen": -0.3415123224258423, "rewards/margins": 0.7825182676315308, "rewards/rejected": -1.124030590057373, "step": 860 }, { "epoch": 0.1, "learning_rate": 2.7436497717429474e-07, "logits/chosen": -2.879946708679199, "logits/rejected": -2.668354034423828, "logps/chosen": -247.2313232421875, "logps/rejected": -378.1593017578125, "loss": 0.4276, "rewards/accuracies": 0.75, "rewards/chosen": -0.41900256276130676, "rewards/margins": 0.9543185830116272, "rewards/rejected": -1.3733210563659668, "step": 861 }, { "epoch": 0.1, "learning_rate": 2.743298607046705e-07, "logits/chosen": -3.0926265716552734, "logits/rejected": -3.1871871948242188, "logps/chosen": -268.2755126953125, "logps/rejected": -205.275146484375, "loss": 0.4498, "rewards/accuracies": 0.875, "rewards/chosen": 0.2651245594024658, "rewards/margins": 0.7635625600814819, "rewards/rejected": -0.49843794107437134, "step": 862 }, { "epoch": 0.1, "learning_rate": 2.7429474423504625e-07, "logits/chosen": -2.900203227996826, "logits/rejected": -3.3673105239868164, "logps/chosen": -234.1897430419922, "logps/rejected": -260.4333190917969, "loss": 0.3427, "rewards/accuracies": 0.875, "rewards/chosen": 0.29174286127090454, "rewards/margins": 2.3300509452819824, "rewards/rejected": -2.0383083820343018, "step": 863 }, { "epoch": 0.1, "learning_rate": 2.7425962776542195e-07, "logits/chosen": -2.6148953437805176, "logits/rejected": -2.6465742588043213, "logps/chosen": -392.43109130859375, "logps/rejected": -368.469970703125, "loss": 0.4, "rewards/accuracies": 0.875, "rewards/chosen": 0.09190759807825089, "rewards/margins": 1.0077214241027832, "rewards/rejected": -0.9158138036727905, "step": 864 }, { "epoch": 0.1, "learning_rate": 2.742245112957977e-07, "logits/chosen": -3.818584680557251, "logits/rejected": -3.352694511413574, "logps/chosen": -294.89739990234375, "logps/rejected": -255.57516479492188, "loss": 0.3533, "rewards/accuracies": 0.875, "rewards/chosen": -0.1025727316737175, "rewards/margins": 1.863452434539795, "rewards/rejected": -1.9660251140594482, "step": 865 }, { "epoch": 0.1, "learning_rate": 2.7418939482617346e-07, "logits/chosen": -3.4445462226867676, "logits/rejected": -2.934802532196045, "logps/chosen": -610.8291015625, "logps/rejected": -206.37393188476562, "loss": 0.4809, "rewards/accuracies": 0.75, "rewards/chosen": -0.3013654351234436, "rewards/margins": 0.7586274147033691, "rewards/rejected": -1.059992790222168, "step": 866 }, { "epoch": 0.1, "learning_rate": 2.741542783565492e-07, "logits/chosen": -3.7697229385375977, "logits/rejected": -3.583829402923584, "logps/chosen": -315.19659423828125, "logps/rejected": -248.25814819335938, "loss": 0.8446, "rewards/accuracies": 0.375, "rewards/chosen": -0.4225854277610779, "rewards/margins": 0.012507237493991852, "rewards/rejected": -0.43509265780448914, "step": 867 }, { "epoch": 0.1, "learning_rate": 2.7411916188692497e-07, "logits/chosen": -3.731266975402832, "logits/rejected": -3.948967456817627, "logps/chosen": -89.87312316894531, "logps/rejected": -165.8406524658203, "loss": 0.5956, "rewards/accuracies": 0.5, "rewards/chosen": -0.06773865222930908, "rewards/margins": 0.342643141746521, "rewards/rejected": -0.41038182377815247, "step": 868 }, { "epoch": 0.1, "learning_rate": 2.7408404541730067e-07, "logits/chosen": -2.8064169883728027, "logits/rejected": -2.845860242843628, "logps/chosen": -443.707763671875, "logps/rejected": -338.8929748535156, "loss": 0.59, "rewards/accuracies": 0.75, "rewards/chosen": -0.14651471376419067, "rewards/margins": 0.48660218715667725, "rewards/rejected": -0.6331169009208679, "step": 869 }, { "epoch": 0.1, "learning_rate": 2.740489289476765e-07, "logits/chosen": -2.7667996883392334, "logits/rejected": -3.015601873397827, "logps/chosen": -425.88690185546875, "logps/rejected": -338.3902893066406, "loss": 0.5371, "rewards/accuracies": 0.625, "rewards/chosen": -0.13016009330749512, "rewards/margins": 0.8180822134017944, "rewards/rejected": -0.9482423067092896, "step": 870 }, { "epoch": 0.1, "learning_rate": 2.7401381247805223e-07, "logits/chosen": -2.478245973587036, "logits/rejected": -2.573784828186035, "logps/chosen": -361.21142578125, "logps/rejected": -276.9137878417969, "loss": 0.4985, "rewards/accuracies": 0.875, "rewards/chosen": 0.12616321444511414, "rewards/margins": 0.7055253386497498, "rewards/rejected": -0.579362154006958, "step": 871 }, { "epoch": 0.1, "learning_rate": 2.7397869600842793e-07, "logits/chosen": -2.9991977214813232, "logits/rejected": -3.1077451705932617, "logps/chosen": -291.0771484375, "logps/rejected": -259.90863037109375, "loss": 0.5261, "rewards/accuracies": 0.75, "rewards/chosen": 0.08637861907482147, "rewards/margins": 0.5141648054122925, "rewards/rejected": -0.4277861714363098, "step": 872 }, { "epoch": 0.1, "learning_rate": 2.739435795388037e-07, "logits/chosen": -3.3164827823638916, "logits/rejected": -3.3662006855010986, "logps/chosen": -166.0374298095703, "logps/rejected": -248.99966430664062, "loss": 0.7578, "rewards/accuracies": 0.5, "rewards/chosen": -0.12946414947509766, "rewards/margins": -0.05476903170347214, "rewards/rejected": -0.07469511032104492, "step": 873 }, { "epoch": 0.1, "learning_rate": 2.7390846306917944e-07, "logits/chosen": -3.2020163536071777, "logits/rejected": -3.1777071952819824, "logps/chosen": -154.885498046875, "logps/rejected": -153.17678833007812, "loss": 0.9752, "rewards/accuracies": 0.25, "rewards/chosen": -0.11419603228569031, "rewards/margins": -0.42291784286499023, "rewards/rejected": 0.3087218403816223, "step": 874 }, { "epoch": 0.1, "learning_rate": 2.738733465995552e-07, "logits/chosen": -3.5250675678253174, "logits/rejected": -3.100283622741699, "logps/chosen": -140.64231872558594, "logps/rejected": -166.44606018066406, "loss": 0.3511, "rewards/accuracies": 0.875, "rewards/chosen": 0.19421644508838654, "rewards/margins": 1.327646255493164, "rewards/rejected": -1.133429765701294, "step": 875 }, { "epoch": 0.1, "learning_rate": 2.7383823012993094e-07, "logits/chosen": -3.2447712421417236, "logits/rejected": -3.0207910537719727, "logps/chosen": -496.02557373046875, "logps/rejected": -273.6380310058594, "loss": 0.3026, "rewards/accuracies": 0.875, "rewards/chosen": 0.4117898941040039, "rewards/margins": 1.565908432006836, "rewards/rejected": -1.154118537902832, "step": 876 }, { "epoch": 0.1, "learning_rate": 2.7380311366030664e-07, "logits/chosen": -3.3602166175842285, "logits/rejected": -3.648688793182373, "logps/chosen": -256.692626953125, "logps/rejected": -211.4127655029297, "loss": 0.6922, "rewards/accuracies": 0.625, "rewards/chosen": -0.3914310038089752, "rewards/margins": 0.5668737888336182, "rewards/rejected": -0.958304762840271, "step": 877 }, { "epoch": 0.1, "learning_rate": 2.737679971906824e-07, "logits/chosen": -3.647803783416748, "logits/rejected": -3.6295180320739746, "logps/chosen": -208.53182983398438, "logps/rejected": -188.347900390625, "loss": 0.2912, "rewards/accuracies": 0.875, "rewards/chosen": 0.13901200890541077, "rewards/margins": 1.7613424062728882, "rewards/rejected": -1.6223305463790894, "step": 878 }, { "epoch": 0.1, "learning_rate": 2.737328807210582e-07, "logits/chosen": -2.697908878326416, "logits/rejected": -2.2430458068847656, "logps/chosen": -354.37591552734375, "logps/rejected": -455.57562255859375, "loss": 0.5494, "rewards/accuracies": 0.625, "rewards/chosen": 0.15349599719047546, "rewards/margins": 1.0928208827972412, "rewards/rejected": -0.9393249750137329, "step": 879 }, { "epoch": 0.1, "learning_rate": 2.736977642514339e-07, "logits/chosen": -3.46490478515625, "logits/rejected": -3.2499845027923584, "logps/chosen": -328.3086853027344, "logps/rejected": -312.980712890625, "loss": 0.3316, "rewards/accuracies": 0.875, "rewards/chosen": 0.37826910614967346, "rewards/margins": 1.6406892538070679, "rewards/rejected": -1.2624201774597168, "step": 880 }, { "epoch": 0.1, "learning_rate": 2.7366264778180966e-07, "logits/chosen": -2.9198622703552246, "logits/rejected": -2.7697479724884033, "logps/chosen": -210.04257202148438, "logps/rejected": -166.7701873779297, "loss": 0.582, "rewards/accuracies": 0.625, "rewards/chosen": 0.05798802897334099, "rewards/margins": 0.3633895814418793, "rewards/rejected": -0.3054015636444092, "step": 881 }, { "epoch": 0.1, "learning_rate": 2.736275313121854e-07, "logits/chosen": -3.456423044204712, "logits/rejected": -3.374634027481079, "logps/chosen": -201.5541534423828, "logps/rejected": -162.4180908203125, "loss": 0.5269, "rewards/accuracies": 0.75, "rewards/chosen": -0.02390441484749317, "rewards/margins": 0.8680941462516785, "rewards/rejected": -0.8919986486434937, "step": 882 }, { "epoch": 0.1, "learning_rate": 2.7359241484256117e-07, "logits/chosen": -2.9666905403137207, "logits/rejected": -3.176607370376587, "logps/chosen": -269.659912109375, "logps/rejected": -325.2682189941406, "loss": 0.4769, "rewards/accuracies": 0.75, "rewards/chosen": -0.002189110964536667, "rewards/margins": 1.018014907836914, "rewards/rejected": -1.0202040672302246, "step": 883 }, { "epoch": 0.1, "learning_rate": 2.735572983729369e-07, "logits/chosen": -3.3751349449157715, "logits/rejected": -3.492903232574463, "logps/chosen": -308.1128234863281, "logps/rejected": -337.5738525390625, "loss": 0.2612, "rewards/accuracies": 1.0, "rewards/chosen": 0.3474319577217102, "rewards/margins": 2.1836965084075928, "rewards/rejected": -1.8362646102905273, "step": 884 }, { "epoch": 0.1, "learning_rate": 2.735221819033126e-07, "logits/chosen": -2.870685577392578, "logits/rejected": -2.6847031116485596, "logps/chosen": -263.741455078125, "logps/rejected": -230.78762817382812, "loss": 0.4276, "rewards/accuracies": 0.75, "rewards/chosen": 0.4300898015499115, "rewards/margins": 0.8930739164352417, "rewards/rejected": -0.4629840850830078, "step": 885 }, { "epoch": 0.1, "learning_rate": 2.734870654336884e-07, "logits/chosen": -3.4636075496673584, "logits/rejected": -3.4705357551574707, "logps/chosen": -268.2343444824219, "logps/rejected": -314.6069030761719, "loss": 0.3378, "rewards/accuracies": 0.875, "rewards/chosen": -0.06484910845756531, "rewards/margins": 1.4335392713546753, "rewards/rejected": -1.498388409614563, "step": 886 }, { "epoch": 0.1, "learning_rate": 2.7345194896406413e-07, "logits/chosen": -3.4642715454101562, "logits/rejected": -3.288006544113159, "logps/chosen": -168.78045654296875, "logps/rejected": -226.36146545410156, "loss": 0.5041, "rewards/accuracies": 0.75, "rewards/chosen": -0.09172104299068451, "rewards/margins": 1.519975185394287, "rewards/rejected": -1.6116962432861328, "step": 887 }, { "epoch": 0.1, "learning_rate": 2.734168324944399e-07, "logits/chosen": -3.482382297515869, "logits/rejected": -3.510183095932007, "logps/chosen": -225.66082763671875, "logps/rejected": -179.1909637451172, "loss": 0.5038, "rewards/accuracies": 0.75, "rewards/chosen": 0.04460671544075012, "rewards/margins": 0.8375352025032043, "rewards/rejected": -0.7929285168647766, "step": 888 }, { "epoch": 0.1, "learning_rate": 2.7338171602481564e-07, "logits/chosen": -3.683042526245117, "logits/rejected": -3.3677713871002197, "logps/chosen": -290.6557922363281, "logps/rejected": -243.47109985351562, "loss": 0.4793, "rewards/accuracies": 0.75, "rewards/chosen": -0.3878090977668762, "rewards/margins": 1.1898540258407593, "rewards/rejected": -1.5776631832122803, "step": 889 }, { "epoch": 0.1, "learning_rate": 2.7334659955519134e-07, "logits/chosen": -3.787473678588867, "logits/rejected": -3.6991310119628906, "logps/chosen": -175.05210876464844, "logps/rejected": -180.95370483398438, "loss": 0.418, "rewards/accuracies": 0.875, "rewards/chosen": 0.010378304868936539, "rewards/margins": 1.3296396732330322, "rewards/rejected": -1.3192614316940308, "step": 890 }, { "epoch": 0.1, "learning_rate": 2.733114830855671e-07, "logits/chosen": -2.6952500343322754, "logits/rejected": -2.800529718399048, "logps/chosen": -235.1688995361328, "logps/rejected": -286.77069091796875, "loss": 0.4325, "rewards/accuracies": 0.75, "rewards/chosen": 0.2818378508090973, "rewards/margins": 0.9569096565246582, "rewards/rejected": -0.6750718355178833, "step": 891 }, { "epoch": 0.1, "learning_rate": 2.732763666159429e-07, "logits/chosen": -3.5838582515716553, "logits/rejected": -3.304550886154175, "logps/chosen": -289.6060791015625, "logps/rejected": -213.3480224609375, "loss": 0.7233, "rewards/accuracies": 0.5, "rewards/chosen": -0.07249242067337036, "rewards/margins": 0.8512861728668213, "rewards/rejected": -0.9237786531448364, "step": 892 }, { "epoch": 0.1, "learning_rate": 2.732412501463186e-07, "logits/chosen": -3.2545435428619385, "logits/rejected": -3.603292465209961, "logps/chosen": -293.82000732421875, "logps/rejected": -291.3116149902344, "loss": 0.3847, "rewards/accuracies": 0.875, "rewards/chosen": 0.29506242275238037, "rewards/margins": 1.4451227188110352, "rewards/rejected": -1.1500602960586548, "step": 893 }, { "epoch": 0.1, "learning_rate": 2.7320613367669435e-07, "logits/chosen": -3.7829883098602295, "logits/rejected": -3.668048858642578, "logps/chosen": -237.40284729003906, "logps/rejected": -257.0686340332031, "loss": 0.2857, "rewards/accuracies": 0.875, "rewards/chosen": 0.2788296937942505, "rewards/margins": 1.8316324949264526, "rewards/rejected": -1.5528028011322021, "step": 894 }, { "epoch": 0.1, "learning_rate": 2.731710172070701e-07, "logits/chosen": -2.9281060695648193, "logits/rejected": -2.9708497524261475, "logps/chosen": -242.07452392578125, "logps/rejected": -432.1681823730469, "loss": 0.2136, "rewards/accuracies": 1.0, "rewards/chosen": 0.2294134646654129, "rewards/margins": 2.5995736122131348, "rewards/rejected": -2.3701601028442383, "step": 895 }, { "epoch": 0.1, "learning_rate": 2.7313590073744586e-07, "logits/chosen": -2.279395580291748, "logits/rejected": -2.5472590923309326, "logps/chosen": -320.8092956542969, "logps/rejected": -230.53469848632812, "loss": 0.5916, "rewards/accuracies": 0.75, "rewards/chosen": -0.1299741566181183, "rewards/margins": 0.3151235282421112, "rewards/rejected": -0.4450976848602295, "step": 896 }, { "epoch": 0.1, "learning_rate": 2.731007842678216e-07, "logits/chosen": -3.36965012550354, "logits/rejected": -3.397615909576416, "logps/chosen": -161.10597229003906, "logps/rejected": -167.104736328125, "loss": 0.8153, "rewards/accuracies": 0.625, "rewards/chosen": 0.006314871832728386, "rewards/margins": 0.4814962148666382, "rewards/rejected": -0.4751812815666199, "step": 897 }, { "epoch": 0.1, "learning_rate": 2.730656677981973e-07, "logits/chosen": -3.173219680786133, "logits/rejected": -3.1868748664855957, "logps/chosen": -223.11569213867188, "logps/rejected": -240.66107177734375, "loss": 0.6403, "rewards/accuracies": 0.625, "rewards/chosen": -0.48031431436538696, "rewards/margins": 0.7474995851516724, "rewards/rejected": -1.2278138399124146, "step": 898 }, { "epoch": 0.1, "learning_rate": 2.7303055132857307e-07, "logits/chosen": -3.792707920074463, "logits/rejected": -3.7691166400909424, "logps/chosen": -180.13307189941406, "logps/rejected": -255.35284423828125, "loss": 0.3052, "rewards/accuracies": 1.0, "rewards/chosen": 0.39928966760635376, "rewards/margins": 1.502578616142273, "rewards/rejected": -1.1032888889312744, "step": 899 }, { "epoch": 0.1, "learning_rate": 2.729954348589488e-07, "logits/chosen": -3.1248369216918945, "logits/rejected": -3.1994898319244385, "logps/chosen": -235.48703002929688, "logps/rejected": -257.3819580078125, "loss": 0.559, "rewards/accuracies": 0.625, "rewards/chosen": -0.27769044041633606, "rewards/margins": 1.2040843963623047, "rewards/rejected": -1.481774926185608, "step": 900 }, { "epoch": 0.1, "learning_rate": 2.729603183893246e-07, "logits/chosen": -3.0169053077697754, "logits/rejected": -3.217195510864258, "logps/chosen": -356.239013671875, "logps/rejected": -248.81185913085938, "loss": 0.4039, "rewards/accuracies": 0.75, "rewards/chosen": 0.07552237808704376, "rewards/margins": 1.4441397190093994, "rewards/rejected": -1.368617296218872, "step": 901 }, { "epoch": 0.1, "learning_rate": 2.7292520191970033e-07, "logits/chosen": -3.0921874046325684, "logits/rejected": -3.1441752910614014, "logps/chosen": -246.675048828125, "logps/rejected": -227.9263916015625, "loss": 0.3748, "rewards/accuracies": 0.75, "rewards/chosen": 0.21944963932037354, "rewards/margins": 1.3254164457321167, "rewards/rejected": -1.1059668064117432, "step": 902 }, { "epoch": 0.1, "learning_rate": 2.7289008545007603e-07, "logits/chosen": -2.5496089458465576, "logits/rejected": -2.510307788848877, "logps/chosen": -278.2304382324219, "logps/rejected": -248.73326110839844, "loss": 0.2501, "rewards/accuracies": 0.875, "rewards/chosen": 0.06262153387069702, "rewards/margins": 1.8063157796859741, "rewards/rejected": -1.7436943054199219, "step": 903 }, { "epoch": 0.1, "learning_rate": 2.7285496898045184e-07, "logits/chosen": -3.1528491973876953, "logits/rejected": -2.5254809856414795, "logps/chosen": -306.5439453125, "logps/rejected": -283.7752380371094, "loss": 0.6391, "rewards/accuracies": 0.625, "rewards/chosen": -0.5247014760971069, "rewards/margins": 0.3074069917201996, "rewards/rejected": -0.8321084976196289, "step": 904 }, { "epoch": 0.1, "learning_rate": 2.728198525108276e-07, "logits/chosen": -3.4172539710998535, "logits/rejected": -3.4588940143585205, "logps/chosen": -246.3363494873047, "logps/rejected": -179.74822998046875, "loss": 0.3563, "rewards/accuracies": 0.875, "rewards/chosen": -0.08095742762088776, "rewards/margins": 1.3279359340667725, "rewards/rejected": -1.40889310836792, "step": 905 }, { "epoch": 0.1, "learning_rate": 2.727847360412033e-07, "logits/chosen": -3.248837471008301, "logits/rejected": -3.330338954925537, "logps/chosen": -115.96588134765625, "logps/rejected": -240.95388793945312, "loss": 0.3741, "rewards/accuracies": 0.875, "rewards/chosen": 0.1359207034111023, "rewards/margins": 1.4903448820114136, "rewards/rejected": -1.354424238204956, "step": 906 }, { "epoch": 0.1, "learning_rate": 2.7274961957157905e-07, "logits/chosen": -3.1344480514526367, "logits/rejected": -3.188601493835449, "logps/chosen": -265.72772216796875, "logps/rejected": -432.5203857421875, "loss": 0.3431, "rewards/accuracies": 0.875, "rewards/chosen": 0.004061520099639893, "rewards/margins": 1.6172616481781006, "rewards/rejected": -1.6131999492645264, "step": 907 }, { "epoch": 0.1, "learning_rate": 2.727145031019548e-07, "logits/chosen": -2.7339181900024414, "logits/rejected": -2.841093063354492, "logps/chosen": -196.99000549316406, "logps/rejected": -217.36041259765625, "loss": 0.6587, "rewards/accuracies": 0.625, "rewards/chosen": 0.14531797170639038, "rewards/margins": 0.36278384923934937, "rewards/rejected": -0.21746587753295898, "step": 908 }, { "epoch": 0.1, "learning_rate": 2.7267938663233056e-07, "logits/chosen": -2.3694746494293213, "logits/rejected": -2.571016311645508, "logps/chosen": -303.3848876953125, "logps/rejected": -279.5934143066406, "loss": 0.7169, "rewards/accuracies": 0.875, "rewards/chosen": -0.14588168263435364, "rewards/margins": 0.15638813376426697, "rewards/rejected": -0.302269846200943, "step": 909 }, { "epoch": 0.1, "learning_rate": 2.726442701627063e-07, "logits/chosen": -2.7573957443237305, "logits/rejected": -2.4185047149658203, "logps/chosen": -405.59478759765625, "logps/rejected": -390.17626953125, "loss": 0.3234, "rewards/accuracies": 0.875, "rewards/chosen": 0.0948440432548523, "rewards/margins": 1.5815706253051758, "rewards/rejected": -1.4867265224456787, "step": 910 }, { "epoch": 0.11, "learning_rate": 2.72609153693082e-07, "logits/chosen": -1.918601632118225, "logits/rejected": -2.0897252559661865, "logps/chosen": -403.3478088378906, "logps/rejected": -339.4903869628906, "loss": 0.3581, "rewards/accuracies": 0.875, "rewards/chosen": 0.29311293363571167, "rewards/margins": 1.606999397277832, "rewards/rejected": -1.3138864040374756, "step": 911 }, { "epoch": 0.11, "learning_rate": 2.7257403722345776e-07, "logits/chosen": -3.1630916595458984, "logits/rejected": -3.2855231761932373, "logps/chosen": -163.98687744140625, "logps/rejected": -200.6642303466797, "loss": 0.4939, "rewards/accuracies": 0.625, "rewards/chosen": 0.004030302166938782, "rewards/margins": 1.1513805389404297, "rewards/rejected": -1.1473501920700073, "step": 912 }, { "epoch": 0.11, "learning_rate": 2.7253892075383357e-07, "logits/chosen": -3.1772027015686035, "logits/rejected": -2.925394058227539, "logps/chosen": -384.686279296875, "logps/rejected": -375.16949462890625, "loss": 0.634, "rewards/accuracies": 0.625, "rewards/chosen": -0.3023124933242798, "rewards/margins": 0.5147613883018494, "rewards/rejected": -0.8170739412307739, "step": 913 }, { "epoch": 0.11, "learning_rate": 2.7250380428420927e-07, "logits/chosen": -2.9498748779296875, "logits/rejected": -2.8009510040283203, "logps/chosen": -205.6226348876953, "logps/rejected": -242.76422119140625, "loss": 0.5356, "rewards/accuracies": 0.75, "rewards/chosen": -0.21075215935707092, "rewards/margins": 0.7407183647155762, "rewards/rejected": -0.9514705538749695, "step": 914 }, { "epoch": 0.11, "learning_rate": 2.7246868781458503e-07, "logits/chosen": -3.302330493927002, "logits/rejected": -3.010176658630371, "logps/chosen": -420.05255126953125, "logps/rejected": -299.21136474609375, "loss": 0.4446, "rewards/accuracies": 0.875, "rewards/chosen": 0.2245606631040573, "rewards/margins": 1.1491971015930176, "rewards/rejected": -0.9246364831924438, "step": 915 }, { "epoch": 0.11, "learning_rate": 2.724335713449608e-07, "logits/chosen": -2.8898839950561523, "logits/rejected": -3.015110731124878, "logps/chosen": -287.5570373535156, "logps/rejected": -335.2170104980469, "loss": 0.2438, "rewards/accuracies": 0.875, "rewards/chosen": -0.01172279566526413, "rewards/margins": 2.0249836444854736, "rewards/rejected": -2.0367064476013184, "step": 916 }, { "epoch": 0.11, "learning_rate": 2.7239845487533653e-07, "logits/chosen": -3.9990854263305664, "logits/rejected": -3.486722469329834, "logps/chosen": -389.77490234375, "logps/rejected": -188.4969024658203, "loss": 0.2676, "rewards/accuracies": 0.875, "rewards/chosen": 0.025478176772594452, "rewards/margins": 2.083963394165039, "rewards/rejected": -2.0584850311279297, "step": 917 }, { "epoch": 0.11, "learning_rate": 2.723633384057123e-07, "logits/chosen": -2.306894540786743, "logits/rejected": -2.420416831970215, "logps/chosen": -440.85333251953125, "logps/rejected": -234.17715454101562, "loss": 0.6276, "rewards/accuracies": 0.625, "rewards/chosen": 0.07106418907642365, "rewards/margins": 0.3188053369522095, "rewards/rejected": -0.24774114787578583, "step": 918 }, { "epoch": 0.11, "learning_rate": 2.72328221936088e-07, "logits/chosen": -2.4436309337615967, "logits/rejected": -2.366974353790283, "logps/chosen": -336.9437561035156, "logps/rejected": -248.24087524414062, "loss": 0.704, "rewards/accuracies": 0.625, "rewards/chosen": -0.29611319303512573, "rewards/margins": 0.31762832403182983, "rewards/rejected": -0.6137415170669556, "step": 919 }, { "epoch": 0.11, "learning_rate": 2.7229310546646374e-07, "logits/chosen": -3.344815492630005, "logits/rejected": -3.564399242401123, "logps/chosen": -174.67694091796875, "logps/rejected": -193.72999572753906, "loss": 0.2455, "rewards/accuracies": 1.0, "rewards/chosen": 0.4308107793331146, "rewards/margins": 1.627485752105713, "rewards/rejected": -1.1966749429702759, "step": 920 }, { "epoch": 0.11, "learning_rate": 2.722579889968395e-07, "logits/chosen": -3.5176708698272705, "logits/rejected": -3.2607157230377197, "logps/chosen": -300.05047607421875, "logps/rejected": -294.6630859375, "loss": 0.6199, "rewards/accuracies": 0.625, "rewards/chosen": -0.43917638063430786, "rewards/margins": 0.5336997509002686, "rewards/rejected": -0.9728761911392212, "step": 921 }, { "epoch": 0.11, "learning_rate": 2.7222287252721525e-07, "logits/chosen": -2.4959073066711426, "logits/rejected": -2.5892107486724854, "logps/chosen": -199.0511474609375, "logps/rejected": -280.9212646484375, "loss": 0.8191, "rewards/accuracies": 0.5, "rewards/chosen": -0.36172452569007874, "rewards/margins": -0.06297557055950165, "rewards/rejected": -0.2987489700317383, "step": 922 }, { "epoch": 0.11, "learning_rate": 2.72187756057591e-07, "logits/chosen": -3.241739273071289, "logits/rejected": -3.280200958251953, "logps/chosen": -300.1468200683594, "logps/rejected": -202.93215942382812, "loss": 0.3672, "rewards/accuracies": 0.75, "rewards/chosen": 0.11721419543027878, "rewards/margins": 1.380391240119934, "rewards/rejected": -1.2631769180297852, "step": 923 }, { "epoch": 0.11, "learning_rate": 2.7215263958796676e-07, "logits/chosen": -2.7592411041259766, "logits/rejected": -2.867053747177124, "logps/chosen": -206.97164916992188, "logps/rejected": -286.4378662109375, "loss": 0.5792, "rewards/accuracies": 0.5, "rewards/chosen": 0.12773418426513672, "rewards/margins": 0.702996015548706, "rewards/rejected": -0.5752618312835693, "step": 924 }, { "epoch": 0.11, "learning_rate": 2.7211752311834246e-07, "logits/chosen": -3.023634672164917, "logits/rejected": -3.0151944160461426, "logps/chosen": -167.0782928466797, "logps/rejected": -216.24655151367188, "loss": 0.4429, "rewards/accuracies": 0.875, "rewards/chosen": 0.39342784881591797, "rewards/margins": 1.2722269296646118, "rewards/rejected": -0.8787990808486938, "step": 925 }, { "epoch": 0.11, "learning_rate": 2.7208240664871827e-07, "logits/chosen": -2.6129889488220215, "logits/rejected": -2.501737594604492, "logps/chosen": -444.32354736328125, "logps/rejected": -505.006591796875, "loss": 0.4616, "rewards/accuracies": 0.875, "rewards/chosen": 0.3884381055831909, "rewards/margins": 0.7828586101531982, "rewards/rejected": -0.3944205343723297, "step": 926 }, { "epoch": 0.11, "learning_rate": 2.7204729017909397e-07, "logits/chosen": -3.0094833374023438, "logits/rejected": -2.943114757537842, "logps/chosen": -360.1683654785156, "logps/rejected": -304.1644287109375, "loss": 0.7426, "rewards/accuracies": 0.5, "rewards/chosen": -0.3677533268928528, "rewards/margins": 0.00048439204692840576, "rewards/rejected": -0.36823770403862, "step": 927 }, { "epoch": 0.11, "learning_rate": 2.720121737094697e-07, "logits/chosen": -3.487783432006836, "logits/rejected": -3.646026134490967, "logps/chosen": -183.87905883789062, "logps/rejected": -213.95388793945312, "loss": 0.4562, "rewards/accuracies": 0.875, "rewards/chosen": -0.08492925018072128, "rewards/margins": 1.190995693206787, "rewards/rejected": -1.2759250402450562, "step": 928 }, { "epoch": 0.11, "learning_rate": 2.719770572398455e-07, "logits/chosen": -3.632976531982422, "logits/rejected": -3.083171844482422, "logps/chosen": -386.4703369140625, "logps/rejected": -171.87962341308594, "loss": 0.3675, "rewards/accuracies": 0.875, "rewards/chosen": 0.01657090336084366, "rewards/margins": 1.3287577629089355, "rewards/rejected": -1.3121867179870605, "step": 929 }, { "epoch": 0.11, "learning_rate": 2.7194194077022123e-07, "logits/chosen": -3.493985652923584, "logits/rejected": -3.3654446601867676, "logps/chosen": -154.06573486328125, "logps/rejected": -172.19325256347656, "loss": 0.343, "rewards/accuracies": 1.0, "rewards/chosen": 0.11701072752475739, "rewards/margins": 1.1649894714355469, "rewards/rejected": -1.0479786396026611, "step": 930 }, { "epoch": 0.11, "learning_rate": 2.71906824300597e-07, "logits/chosen": -3.280346393585205, "logits/rejected": -3.0646750926971436, "logps/chosen": -302.97088623046875, "logps/rejected": -255.5186767578125, "loss": 0.6315, "rewards/accuracies": 0.625, "rewards/chosen": -0.18493299186229706, "rewards/margins": 0.6485589742660522, "rewards/rejected": -0.8334920406341553, "step": 931 }, { "epoch": 0.11, "learning_rate": 2.7187170783097274e-07, "logits/chosen": -3.14422607421875, "logits/rejected": -3.1548752784729004, "logps/chosen": -259.3439025878906, "logps/rejected": -301.1274108886719, "loss": 0.3588, "rewards/accuracies": 0.75, "rewards/chosen": -0.05870581790804863, "rewards/margins": 1.8426764011383057, "rewards/rejected": -1.9013820886611938, "step": 932 }, { "epoch": 0.11, "learning_rate": 2.7183659136134844e-07, "logits/chosen": -2.8842153549194336, "logits/rejected": -2.8962459564208984, "logps/chosen": -154.29705810546875, "logps/rejected": -243.53192138671875, "loss": 0.3651, "rewards/accuracies": 0.75, "rewards/chosen": 0.09773492813110352, "rewards/margins": 1.3322856426239014, "rewards/rejected": -1.2345507144927979, "step": 933 }, { "epoch": 0.11, "learning_rate": 2.718014748917242e-07, "logits/chosen": -2.6701111793518066, "logits/rejected": -2.807131290435791, "logps/chosen": -218.81524658203125, "logps/rejected": -278.7330017089844, "loss": 0.5134, "rewards/accuracies": 0.875, "rewards/chosen": -0.23717649281024933, "rewards/margins": 1.611145257949829, "rewards/rejected": -1.848321795463562, "step": 934 }, { "epoch": 0.11, "learning_rate": 2.7176635842209994e-07, "logits/chosen": -2.975229024887085, "logits/rejected": -2.8885738849639893, "logps/chosen": -122.40269470214844, "logps/rejected": -128.49169921875, "loss": 0.5268, "rewards/accuracies": 0.75, "rewards/chosen": -0.09790345281362534, "rewards/margins": 0.7106057405471802, "rewards/rejected": -0.8085091710090637, "step": 935 }, { "epoch": 0.11, "learning_rate": 2.717312419524757e-07, "logits/chosen": -3.4507875442504883, "logits/rejected": -3.414834499359131, "logps/chosen": -243.28448486328125, "logps/rejected": -265.5135803222656, "loss": 0.2805, "rewards/accuracies": 0.875, "rewards/chosen": 0.5943059921264648, "rewards/margins": 1.720097541809082, "rewards/rejected": -1.1257915496826172, "step": 936 }, { "epoch": 0.11, "learning_rate": 2.7169612548285145e-07, "logits/chosen": -2.889873504638672, "logits/rejected": -2.9832043647766113, "logps/chosen": -197.5057373046875, "logps/rejected": -114.00299835205078, "loss": 0.5697, "rewards/accuracies": 0.5, "rewards/chosen": -0.16511191427707672, "rewards/margins": 0.49695950746536255, "rewards/rejected": -0.6620713472366333, "step": 937 }, { "epoch": 0.11, "learning_rate": 2.716610090132272e-07, "logits/chosen": -3.812404155731201, "logits/rejected": -3.8224549293518066, "logps/chosen": -139.3220672607422, "logps/rejected": -162.1681365966797, "loss": 0.4036, "rewards/accuracies": 0.75, "rewards/chosen": -0.0451909676194191, "rewards/margins": 1.1646888256072998, "rewards/rejected": -1.2098798751831055, "step": 938 }, { "epoch": 0.11, "learning_rate": 2.7162589254360296e-07, "logits/chosen": -3.9208450317382812, "logits/rejected": -3.644503116607666, "logps/chosen": -429.77001953125, "logps/rejected": -298.3420104980469, "loss": 0.761, "rewards/accuracies": 0.625, "rewards/chosen": -0.8798959851264954, "rewards/margins": 0.1864919811487198, "rewards/rejected": -1.0663880109786987, "step": 939 }, { "epoch": 0.11, "learning_rate": 2.715907760739787e-07, "logits/chosen": -2.138636589050293, "logits/rejected": -2.1230974197387695, "logps/chosen": -284.8170166015625, "logps/rejected": -295.7038269042969, "loss": 0.5185, "rewards/accuracies": 0.625, "rewards/chosen": -0.1885736733675003, "rewards/margins": 0.983759880065918, "rewards/rejected": -1.1723335981369019, "step": 940 }, { "epoch": 0.11, "learning_rate": 2.715556596043544e-07, "logits/chosen": -3.553351402282715, "logits/rejected": -3.8664088249206543, "logps/chosen": -247.04934692382812, "logps/rejected": -362.0994567871094, "loss": 0.4512, "rewards/accuracies": 0.875, "rewards/chosen": -0.23752403259277344, "rewards/margins": 1.1279785633087158, "rewards/rejected": -1.3655025959014893, "step": 941 }, { "epoch": 0.11, "learning_rate": 2.7152054313473017e-07, "logits/chosen": -2.975712299346924, "logits/rejected": -3.1567800045013428, "logps/chosen": -306.24237060546875, "logps/rejected": -264.3021240234375, "loss": 0.4559, "rewards/accuracies": 0.75, "rewards/chosen": -0.09428466856479645, "rewards/margins": 1.0892207622528076, "rewards/rejected": -1.1835055351257324, "step": 942 }, { "epoch": 0.11, "learning_rate": 2.714854266651059e-07, "logits/chosen": -3.493198871612549, "logits/rejected": -3.3508262634277344, "logps/chosen": -302.9748229980469, "logps/rejected": -291.0611267089844, "loss": 0.5418, "rewards/accuracies": 0.75, "rewards/chosen": -0.351968377828598, "rewards/margins": 1.118449091911316, "rewards/rejected": -1.4704174995422363, "step": 943 }, { "epoch": 0.11, "learning_rate": 2.714503101954817e-07, "logits/chosen": -3.0290277004241943, "logits/rejected": -3.262327194213867, "logps/chosen": -270.17156982421875, "logps/rejected": -286.8800354003906, "loss": 0.652, "rewards/accuracies": 0.75, "rewards/chosen": 0.09464433789253235, "rewards/margins": 1.1609529256820679, "rewards/rejected": -1.066308617591858, "step": 944 }, { "epoch": 0.11, "learning_rate": 2.7141519372585743e-07, "logits/chosen": -2.8107359409332275, "logits/rejected": -2.7836945056915283, "logps/chosen": -263.7593688964844, "logps/rejected": -237.64352416992188, "loss": 0.5396, "rewards/accuracies": 0.5, "rewards/chosen": -0.025854304432868958, "rewards/margins": 1.1632884740829468, "rewards/rejected": -1.1891427040100098, "step": 945 }, { "epoch": 0.11, "learning_rate": 2.7138007725623313e-07, "logits/chosen": -2.8193705081939697, "logits/rejected": -2.9974308013916016, "logps/chosen": -303.5420837402344, "logps/rejected": -171.15150451660156, "loss": 0.6617, "rewards/accuracies": 0.5, "rewards/chosen": 0.16644184291362762, "rewards/margins": 0.5049163103103638, "rewards/rejected": -0.3384745121002197, "step": 946 }, { "epoch": 0.11, "learning_rate": 2.7134496078660894e-07, "logits/chosen": -3.325385093688965, "logits/rejected": -3.2183837890625, "logps/chosen": -197.38986206054688, "logps/rejected": -238.8984832763672, "loss": 0.3043, "rewards/accuracies": 1.0, "rewards/chosen": 0.14208994805812836, "rewards/margins": 1.0950318574905396, "rewards/rejected": -0.9529418349266052, "step": 947 }, { "epoch": 0.11, "learning_rate": 2.7130984431698464e-07, "logits/chosen": -3.369166612625122, "logits/rejected": -3.4006786346435547, "logps/chosen": -157.16506958007812, "logps/rejected": -160.9402313232422, "loss": 0.4501, "rewards/accuracies": 0.75, "rewards/chosen": -0.14649298787117004, "rewards/margins": 1.0613946914672852, "rewards/rejected": -1.2078876495361328, "step": 948 }, { "epoch": 0.11, "learning_rate": 2.712747278473604e-07, "logits/chosen": -3.165745735168457, "logits/rejected": -3.417111873626709, "logps/chosen": -222.613525390625, "logps/rejected": -263.4788513183594, "loss": 0.5029, "rewards/accuracies": 0.875, "rewards/chosen": -0.18515644967556, "rewards/margins": 1.022705078125, "rewards/rejected": -1.2078614234924316, "step": 949 }, { "epoch": 0.11, "learning_rate": 2.7123961137773615e-07, "logits/chosen": -3.3367128372192383, "logits/rejected": -3.596122980117798, "logps/chosen": -325.55694580078125, "logps/rejected": -176.81243896484375, "loss": 0.5394, "rewards/accuracies": 0.875, "rewards/chosen": -0.6319919228553772, "rewards/margins": 0.41301268339157104, "rewards/rejected": -1.0450046062469482, "step": 950 }, { "epoch": 0.11, "learning_rate": 2.712044949081119e-07, "logits/chosen": -3.745450019836426, "logits/rejected": -3.401707887649536, "logps/chosen": -331.63543701171875, "logps/rejected": -164.2527618408203, "loss": 0.2693, "rewards/accuracies": 1.0, "rewards/chosen": 0.1420428454875946, "rewards/margins": 1.5322678089141846, "rewards/rejected": -1.390224814414978, "step": 951 }, { "epoch": 0.11, "learning_rate": 2.7116937843848765e-07, "logits/chosen": -2.7094602584838867, "logits/rejected": -2.7347474098205566, "logps/chosen": -455.28558349609375, "logps/rejected": -379.8451232910156, "loss": 0.4634, "rewards/accuracies": 0.75, "rewards/chosen": -0.03492278605699539, "rewards/margins": 0.9114267230033875, "rewards/rejected": -0.9463495016098022, "step": 952 }, { "epoch": 0.11, "learning_rate": 2.711342619688634e-07, "logits/chosen": -3.1910560131073, "logits/rejected": -2.6987390518188477, "logps/chosen": -333.8016662597656, "logps/rejected": -243.1864013671875, "loss": 0.322, "rewards/accuracies": 0.875, "rewards/chosen": -0.13402239978313446, "rewards/margins": 1.1437492370605469, "rewards/rejected": -1.2777715921401978, "step": 953 }, { "epoch": 0.11, "learning_rate": 2.710991454992391e-07, "logits/chosen": -2.849750518798828, "logits/rejected": -2.8113839626312256, "logps/chosen": -252.40562438964844, "logps/rejected": -261.11541748046875, "loss": 0.4957, "rewards/accuracies": 0.75, "rewards/chosen": -0.15758399665355682, "rewards/margins": 0.8513338565826416, "rewards/rejected": -1.0089179277420044, "step": 954 }, { "epoch": 0.11, "learning_rate": 2.7106402902961486e-07, "logits/chosen": -2.7115280628204346, "logits/rejected": -2.7580766677856445, "logps/chosen": -495.89532470703125, "logps/rejected": -405.28082275390625, "loss": 0.3589, "rewards/accuracies": 0.875, "rewards/chosen": 0.460294246673584, "rewards/margins": 1.2527241706848145, "rewards/rejected": -0.7924299240112305, "step": 955 }, { "epoch": 0.11, "learning_rate": 2.710289125599906e-07, "logits/chosen": -3.030745029449463, "logits/rejected": -3.0923924446105957, "logps/chosen": -208.54684448242188, "logps/rejected": -281.8843078613281, "loss": 0.3004, "rewards/accuracies": 0.75, "rewards/chosen": 0.15190669894218445, "rewards/margins": 2.155510902404785, "rewards/rejected": -2.0036041736602783, "step": 956 }, { "epoch": 0.11, "learning_rate": 2.7099379609036637e-07, "logits/chosen": -3.021512031555176, "logits/rejected": -3.050847053527832, "logps/chosen": -114.19599914550781, "logps/rejected": -190.4558868408203, "loss": 0.5362, "rewards/accuracies": 0.75, "rewards/chosen": -0.1878015398979187, "rewards/margins": 1.601518154144287, "rewards/rejected": -1.7893197536468506, "step": 957 }, { "epoch": 0.11, "learning_rate": 2.709586796207421e-07, "logits/chosen": -2.7128593921661377, "logits/rejected": -2.8246259689331055, "logps/chosen": -439.3732604980469, "logps/rejected": -315.50213623046875, "loss": 0.3405, "rewards/accuracies": 0.875, "rewards/chosen": 0.17414307594299316, "rewards/margins": 1.7964141368865967, "rewards/rejected": -1.6222712993621826, "step": 958 }, { "epoch": 0.11, "learning_rate": 2.709235631511178e-07, "logits/chosen": -2.8361897468566895, "logits/rejected": -3.0565404891967773, "logps/chosen": -168.5650177001953, "logps/rejected": -226.47120666503906, "loss": 0.5115, "rewards/accuracies": 0.75, "rewards/chosen": -0.5012440085411072, "rewards/margins": 0.6525136232376099, "rewards/rejected": -1.1537576913833618, "step": 959 }, { "epoch": 0.11, "learning_rate": 2.7088844668149363e-07, "logits/chosen": -2.7673158645629883, "logits/rejected": -2.9847230911254883, "logps/chosen": -155.2431640625, "logps/rejected": -146.95323181152344, "loss": 0.6291, "rewards/accuracies": 0.75, "rewards/chosen": -0.11602359265089035, "rewards/margins": 0.9629310369491577, "rewards/rejected": -1.0789545774459839, "step": 960 }, { "epoch": 0.11, "learning_rate": 2.708533302118694e-07, "logits/chosen": -2.4599609375, "logits/rejected": -2.3996639251708984, "logps/chosen": -435.77947998046875, "logps/rejected": -361.87213134765625, "loss": 0.6775, "rewards/accuracies": 0.625, "rewards/chosen": 0.2719172537326813, "rewards/margins": 0.47527581453323364, "rewards/rejected": -0.20335856080055237, "step": 961 }, { "epoch": 0.11, "learning_rate": 2.708182137422451e-07, "logits/chosen": -3.1659483909606934, "logits/rejected": -3.0268325805664062, "logps/chosen": -264.2563171386719, "logps/rejected": -219.39776611328125, "loss": 0.5216, "rewards/accuracies": 0.75, "rewards/chosen": 0.2708269953727722, "rewards/margins": 0.5214576721191406, "rewards/rejected": -0.2506306767463684, "step": 962 }, { "epoch": 0.11, "learning_rate": 2.7078309727262084e-07, "logits/chosen": -2.314093589782715, "logits/rejected": -2.2031283378601074, "logps/chosen": -188.3428497314453, "logps/rejected": -242.99642944335938, "loss": 0.6951, "rewards/accuracies": 0.625, "rewards/chosen": 0.10376887768507004, "rewards/margins": 0.18178167939186096, "rewards/rejected": -0.07801275700330734, "step": 963 }, { "epoch": 0.11, "learning_rate": 2.707479808029966e-07, "logits/chosen": -3.683884620666504, "logits/rejected": -3.606537342071533, "logps/chosen": -190.3939208984375, "logps/rejected": -177.0293731689453, "loss": 0.5049, "rewards/accuracies": 0.625, "rewards/chosen": 0.09123976528644562, "rewards/margins": 0.7861461639404297, "rewards/rejected": -0.6949064135551453, "step": 964 }, { "epoch": 0.11, "learning_rate": 2.7071286433337235e-07, "logits/chosen": -2.5376155376434326, "logits/rejected": -2.6280434131622314, "logps/chosen": -388.2626647949219, "logps/rejected": -300.81170654296875, "loss": 0.3047, "rewards/accuracies": 1.0, "rewards/chosen": 0.6321220397949219, "rewards/margins": 1.5867618322372437, "rewards/rejected": -0.9546397924423218, "step": 965 }, { "epoch": 0.11, "learning_rate": 2.706777478637481e-07, "logits/chosen": -3.2188303470611572, "logits/rejected": -3.3861145973205566, "logps/chosen": -209.6493682861328, "logps/rejected": -205.88406372070312, "loss": 0.395, "rewards/accuracies": 0.875, "rewards/chosen": 0.3750436305999756, "rewards/margins": 1.3258228302001953, "rewards/rejected": -0.9507793188095093, "step": 966 }, { "epoch": 0.11, "learning_rate": 2.706426313941238e-07, "logits/chosen": -3.0122857093811035, "logits/rejected": -3.042473077774048, "logps/chosen": -439.5854797363281, "logps/rejected": -385.3846435546875, "loss": 0.5461, "rewards/accuracies": 0.75, "rewards/chosen": 0.20541119575500488, "rewards/margins": 0.6080130338668823, "rewards/rejected": -0.40260183811187744, "step": 967 }, { "epoch": 0.11, "learning_rate": 2.7060751492449956e-07, "logits/chosen": -3.0006396770477295, "logits/rejected": -2.839611291885376, "logps/chosen": -250.30145263671875, "logps/rejected": -214.79049682617188, "loss": 0.3444, "rewards/accuracies": 0.875, "rewards/chosen": 0.05845535174012184, "rewards/margins": 1.7147998809814453, "rewards/rejected": -1.6563444137573242, "step": 968 }, { "epoch": 0.11, "learning_rate": 2.7057239845487536e-07, "logits/chosen": -3.455221652984619, "logits/rejected": -3.0053658485412598, "logps/chosen": -274.49749755859375, "logps/rejected": -263.79644775390625, "loss": 0.5441, "rewards/accuracies": 0.625, "rewards/chosen": -0.2278425395488739, "rewards/margins": 0.7620329856872559, "rewards/rejected": -0.9898754954338074, "step": 969 }, { "epoch": 0.11, "learning_rate": 2.7053728198525106e-07, "logits/chosen": -2.83850359916687, "logits/rejected": -2.9234111309051514, "logps/chosen": -200.509033203125, "logps/rejected": -211.42225646972656, "loss": 0.3536, "rewards/accuracies": 0.75, "rewards/chosen": 0.18831130862236023, "rewards/margins": 1.7086939811706543, "rewards/rejected": -1.5203826427459717, "step": 970 }, { "epoch": 0.11, "learning_rate": 2.705021655156268e-07, "logits/chosen": -3.025434732437134, "logits/rejected": -3.2490336894989014, "logps/chosen": -203.34445190429688, "logps/rejected": -320.43096923828125, "loss": 0.7002, "rewards/accuracies": 0.625, "rewards/chosen": 0.4108351171016693, "rewards/margins": 0.7105294466018677, "rewards/rejected": -0.299694299697876, "step": 971 }, { "epoch": 0.11, "learning_rate": 2.7046704904600257e-07, "logits/chosen": -2.8695127964019775, "logits/rejected": -3.0990819931030273, "logps/chosen": -278.72174072265625, "logps/rejected": -235.55712890625, "loss": 0.6387, "rewards/accuracies": 0.875, "rewards/chosen": -0.027105100452899933, "rewards/margins": 0.9839267730712891, "rewards/rejected": -1.011031985282898, "step": 972 }, { "epoch": 0.11, "learning_rate": 2.7043193257637833e-07, "logits/chosen": -3.7534422874450684, "logits/rejected": -3.6492557525634766, "logps/chosen": -210.11024475097656, "logps/rejected": -231.05670166015625, "loss": 0.2903, "rewards/accuracies": 1.0, "rewards/chosen": 0.035855215042829514, "rewards/margins": 1.829070806503296, "rewards/rejected": -1.7932155132293701, "step": 973 }, { "epoch": 0.11, "learning_rate": 2.703968161067541e-07, "logits/chosen": -3.133993625640869, "logits/rejected": -2.5676791667938232, "logps/chosen": -329.77996826171875, "logps/rejected": -317.7174377441406, "loss": 0.2575, "rewards/accuracies": 1.0, "rewards/chosen": 0.042493656277656555, "rewards/margins": 1.4304752349853516, "rewards/rejected": -1.3879815340042114, "step": 974 }, { "epoch": 0.11, "learning_rate": 2.703616996371298e-07, "logits/chosen": -2.669872283935547, "logits/rejected": -2.8968656063079834, "logps/chosen": -182.85948181152344, "logps/rejected": -162.0805206298828, "loss": 0.3091, "rewards/accuracies": 0.875, "rewards/chosen": 0.4360937476158142, "rewards/margins": 1.4947019815444946, "rewards/rejected": -1.0586082935333252, "step": 975 }, { "epoch": 0.11, "learning_rate": 2.7032658316750554e-07, "logits/chosen": -3.2783894538879395, "logits/rejected": -3.618631362915039, "logps/chosen": -152.9832763671875, "logps/rejected": -190.10696411132812, "loss": 0.5047, "rewards/accuracies": 0.625, "rewards/chosen": 0.11848974227905273, "rewards/margins": 1.1498838663101196, "rewards/rejected": -1.031394124031067, "step": 976 }, { "epoch": 0.11, "learning_rate": 2.702914666978813e-07, "logits/chosen": -2.243051528930664, "logits/rejected": -2.3684303760528564, "logps/chosen": -355.09503173828125, "logps/rejected": -228.03611755371094, "loss": 0.5008, "rewards/accuracies": 0.75, "rewards/chosen": -0.0780981108546257, "rewards/margins": 0.7269706130027771, "rewards/rejected": -0.8050686717033386, "step": 977 }, { "epoch": 0.11, "learning_rate": 2.7025635022825704e-07, "logits/chosen": -2.8946638107299805, "logits/rejected": -2.8324694633483887, "logps/chosen": -151.55172729492188, "logps/rejected": -208.7655487060547, "loss": 0.3202, "rewards/accuracies": 0.875, "rewards/chosen": -0.08181506395339966, "rewards/margins": 1.2029764652252197, "rewards/rejected": -1.2847914695739746, "step": 978 }, { "epoch": 0.11, "learning_rate": 2.702212337586328e-07, "logits/chosen": -2.8441410064697266, "logits/rejected": -2.9558866024017334, "logps/chosen": -522.5399780273438, "logps/rejected": -342.52520751953125, "loss": 0.2877, "rewards/accuracies": 0.875, "rewards/chosen": 0.48758476972579956, "rewards/margins": 1.563379168510437, "rewards/rejected": -1.0757943391799927, "step": 979 }, { "epoch": 0.11, "learning_rate": 2.701861172890085e-07, "logits/chosen": -3.30424165725708, "logits/rejected": -3.3372042179107666, "logps/chosen": -343.93792724609375, "logps/rejected": -470.9925842285156, "loss": 0.4793, "rewards/accuracies": 0.625, "rewards/chosen": -0.21383866667747498, "rewards/margins": 1.3687883615493774, "rewards/rejected": -1.5826270580291748, "step": 980 }, { "epoch": 0.11, "learning_rate": 2.701510008193843e-07, "logits/chosen": -3.6467607021331787, "logits/rejected": -3.415522575378418, "logps/chosen": -307.7083740234375, "logps/rejected": -232.10049438476562, "loss": 0.7704, "rewards/accuracies": 0.75, "rewards/chosen": -0.5199838280677795, "rewards/margins": 0.3384891152381897, "rewards/rejected": -0.8584729433059692, "step": 981 }, { "epoch": 0.11, "learning_rate": 2.7011588434976006e-07, "logits/chosen": -2.210667133331299, "logits/rejected": -2.431813955307007, "logps/chosen": -383.2902526855469, "logps/rejected": -377.5415954589844, "loss": 0.536, "rewards/accuracies": 0.875, "rewards/chosen": 0.22913381457328796, "rewards/margins": 0.573865532875061, "rewards/rejected": -0.34473171830177307, "step": 982 }, { "epoch": 0.11, "learning_rate": 2.7008076788013576e-07, "logits/chosen": -3.2514007091522217, "logits/rejected": -3.481778144836426, "logps/chosen": -143.31439208984375, "logps/rejected": -201.0032958984375, "loss": 0.5603, "rewards/accuracies": 0.875, "rewards/chosen": -0.14243854582309723, "rewards/margins": 0.7480747699737549, "rewards/rejected": -0.8905133605003357, "step": 983 }, { "epoch": 0.11, "learning_rate": 2.700456514105115e-07, "logits/chosen": -3.275930404663086, "logits/rejected": -3.127488851547241, "logps/chosen": -339.2230529785156, "logps/rejected": -323.06976318359375, "loss": 0.2795, "rewards/accuracies": 1.0, "rewards/chosen": 0.37228983640670776, "rewards/margins": 1.5589337348937988, "rewards/rejected": -1.1866439580917358, "step": 984 }, { "epoch": 0.11, "learning_rate": 2.7001053494088727e-07, "logits/chosen": -3.872694492340088, "logits/rejected": -3.9138529300689697, "logps/chosen": -101.06723022460938, "logps/rejected": -172.11981201171875, "loss": 0.4884, "rewards/accuracies": 0.75, "rewards/chosen": -0.38010311126708984, "rewards/margins": 0.6716045141220093, "rewards/rejected": -1.0517076253890991, "step": 985 }, { "epoch": 0.11, "learning_rate": 2.69975418471263e-07, "logits/chosen": -3.3201053142547607, "logits/rejected": -3.2113113403320312, "logps/chosen": -291.1360778808594, "logps/rejected": -248.64578247070312, "loss": 0.4353, "rewards/accuracies": 0.875, "rewards/chosen": 0.15845462679862976, "rewards/margins": 0.7811118364334106, "rewards/rejected": -0.6226572394371033, "step": 986 }, { "epoch": 0.11, "learning_rate": 2.699403020016388e-07, "logits/chosen": -2.7711539268493652, "logits/rejected": -3.1714000701904297, "logps/chosen": -334.8018798828125, "logps/rejected": -209.05471801757812, "loss": 0.6539, "rewards/accuracies": 0.625, "rewards/chosen": -0.09502878785133362, "rewards/margins": 0.4424913227558136, "rewards/rejected": -0.5375201106071472, "step": 987 }, { "epoch": 0.11, "learning_rate": 2.699051855320145e-07, "logits/chosen": -2.6127400398254395, "logits/rejected": -2.5484299659729004, "logps/chosen": -254.2086944580078, "logps/rejected": -260.4604187011719, "loss": 0.4083, "rewards/accuracies": 0.875, "rewards/chosen": 0.1699618250131607, "rewards/margins": 1.173986792564392, "rewards/rejected": -1.0040249824523926, "step": 988 }, { "epoch": 0.11, "learning_rate": 2.6987006906239023e-07, "logits/chosen": -2.0360960960388184, "logits/rejected": -2.2439823150634766, "logps/chosen": -236.25289916992188, "logps/rejected": -241.26095581054688, "loss": 0.56, "rewards/accuracies": 0.75, "rewards/chosen": -0.17995098233222961, "rewards/margins": 0.8275477886199951, "rewards/rejected": -1.007498860359192, "step": 989 }, { "epoch": 0.11, "learning_rate": 2.69834952592766e-07, "logits/chosen": -3.196899890899658, "logits/rejected": -2.928715944290161, "logps/chosen": -234.94532775878906, "logps/rejected": -165.67864990234375, "loss": 0.5407, "rewards/accuracies": 0.75, "rewards/chosen": -0.08156568557024002, "rewards/margins": 0.6188755631446838, "rewards/rejected": -0.7004412412643433, "step": 990 }, { "epoch": 0.11, "learning_rate": 2.6979983612314174e-07, "logits/chosen": -2.80786395072937, "logits/rejected": -2.6346147060394287, "logps/chosen": -308.9167175292969, "logps/rejected": -256.677734375, "loss": 0.593, "rewards/accuracies": 0.625, "rewards/chosen": 0.2223612666130066, "rewards/margins": 0.36026954650878906, "rewards/rejected": -0.13790826499462128, "step": 991 }, { "epoch": 0.11, "learning_rate": 2.697647196535175e-07, "logits/chosen": -3.226032257080078, "logits/rejected": -3.506516933441162, "logps/chosen": -162.94314575195312, "logps/rejected": -271.2950744628906, "loss": 0.5812, "rewards/accuracies": 0.875, "rewards/chosen": -0.27302369475364685, "rewards/margins": 0.9992998838424683, "rewards/rejected": -1.272323489189148, "step": 992 }, { "epoch": 0.11, "learning_rate": 2.697296031838932e-07, "logits/chosen": -2.8696129322052, "logits/rejected": -2.801812171936035, "logps/chosen": -224.36204528808594, "logps/rejected": -264.31707763671875, "loss": 0.4144, "rewards/accuracies": 0.875, "rewards/chosen": 0.21553891897201538, "rewards/margins": 0.9869098663330078, "rewards/rejected": -0.7713708877563477, "step": 993 }, { "epoch": 0.11, "learning_rate": 2.69694486714269e-07, "logits/chosen": -3.83064866065979, "logits/rejected": -3.643404245376587, "logps/chosen": -194.24085998535156, "logps/rejected": -172.23036193847656, "loss": 0.4398, "rewards/accuracies": 0.75, "rewards/chosen": -0.17081782221794128, "rewards/margins": 1.6466740369796753, "rewards/rejected": -1.8174920082092285, "step": 994 }, { "epoch": 0.11, "learning_rate": 2.6965937024464475e-07, "logits/chosen": -3.7284765243530273, "logits/rejected": -3.479773998260498, "logps/chosen": -241.43002319335938, "logps/rejected": -201.12896728515625, "loss": 0.4834, "rewards/accuracies": 0.875, "rewards/chosen": 0.09852510690689087, "rewards/margins": 1.2628109455108643, "rewards/rejected": -1.1642858982086182, "step": 995 }, { "epoch": 0.11, "learning_rate": 2.6962425377502045e-07, "logits/chosen": -3.3035807609558105, "logits/rejected": -3.288422107696533, "logps/chosen": -347.50457763671875, "logps/rejected": -234.35922241210938, "loss": 0.392, "rewards/accuracies": 1.0, "rewards/chosen": 0.39147573709487915, "rewards/margins": 1.139094591140747, "rewards/rejected": -0.7476187944412231, "step": 996 }, { "epoch": 0.11, "learning_rate": 2.695891373053962e-07, "logits/chosen": -2.641629934310913, "logits/rejected": -2.8399672508239746, "logps/chosen": -487.91217041015625, "logps/rejected": -400.4972839355469, "loss": 0.5717, "rewards/accuracies": 0.625, "rewards/chosen": 0.08460550010204315, "rewards/margins": 0.42655980587005615, "rewards/rejected": -0.3419543504714966, "step": 997 }, { "epoch": 0.12, "learning_rate": 2.6955402083577196e-07, "logits/chosen": -3.0048513412475586, "logits/rejected": -2.698700428009033, "logps/chosen": -161.59068298339844, "logps/rejected": -260.7747497558594, "loss": 0.4292, "rewards/accuracies": 0.75, "rewards/chosen": -0.04530215263366699, "rewards/margins": 0.8997824788093567, "rewards/rejected": -0.9450846314430237, "step": 998 }, { "epoch": 0.12, "learning_rate": 2.695189043661477e-07, "logits/chosen": -2.8768482208251953, "logits/rejected": -2.7550835609436035, "logps/chosen": -314.4839172363281, "logps/rejected": -257.24560546875, "loss": 0.5145, "rewards/accuracies": 0.875, "rewards/chosen": -0.544773280620575, "rewards/margins": 0.7183042168617249, "rewards/rejected": -1.2630774974822998, "step": 999 }, { "epoch": 0.12, "learning_rate": 2.6948378789652347e-07, "logits/chosen": -2.9549267292022705, "logits/rejected": -3.0565500259399414, "logps/chosen": -488.66455078125, "logps/rejected": -401.908203125, "loss": 0.2682, "rewards/accuracies": 0.875, "rewards/chosen": 0.47644034028053284, "rewards/margins": 1.7262794971466064, "rewards/rejected": -1.249839186668396, "step": 1000 }, { "epoch": 0.12, "eval_logits/chosen": -2.8467764854431152, "eval_logits/rejected": -2.8113934993743896, "eval_logps/chosen": -292.5613098144531, "eval_logps/rejected": -230.89024353027344, "eval_loss": 0.4792271852493286, "eval_rewards/accuracies": 0.7857142686843872, "eval_rewards/chosen": 0.14936694502830505, "eval_rewards/margins": 0.8041319847106934, "eval_rewards/rejected": -0.6547650694847107, "eval_runtime": 32.6014, "eval_samples_per_second": 2.147, "eval_steps_per_second": 1.074, "step": 1000 }, { "epoch": 0.12, "learning_rate": 2.6944867142689917e-07, "logits/chosen": -2.9890384674072266, "logits/rejected": -3.0015764236450195, "logps/chosen": -294.6145324707031, "logps/rejected": -180.47592163085938, "loss": 0.5646, "rewards/accuracies": 0.625, "rewards/chosen": -0.3045096695423126, "rewards/margins": 0.5944913625717163, "rewards/rejected": -0.8990010023117065, "step": 1001 }, { "epoch": 0.12, "learning_rate": 2.694135549572749e-07, "logits/chosen": -2.927506923675537, "logits/rejected": -3.082932710647583, "logps/chosen": -250.5115203857422, "logps/rejected": -169.50790405273438, "loss": 0.55, "rewards/accuracies": 0.625, "rewards/chosen": -0.08371113240718842, "rewards/margins": 0.6469983458518982, "rewards/rejected": -0.7307094931602478, "step": 1002 }, { "epoch": 0.12, "learning_rate": 2.6937843848765073e-07, "logits/chosen": -2.3061983585357666, "logits/rejected": -2.2522501945495605, "logps/chosen": -221.78208923339844, "logps/rejected": -228.43289184570312, "loss": 0.3965, "rewards/accuracies": 0.75, "rewards/chosen": -0.03845285251736641, "rewards/margins": 1.173314094543457, "rewards/rejected": -1.2117670774459839, "step": 1003 }, { "epoch": 0.12, "learning_rate": 2.6934332201802643e-07, "logits/chosen": -3.0025792121887207, "logits/rejected": -3.091940402984619, "logps/chosen": -211.87171936035156, "logps/rejected": -298.54644775390625, "loss": 0.4181, "rewards/accuracies": 0.875, "rewards/chosen": -0.08777110278606415, "rewards/margins": 1.828642725944519, "rewards/rejected": -1.9164137840270996, "step": 1004 }, { "epoch": 0.12, "learning_rate": 2.693082055484022e-07, "logits/chosen": -3.2955312728881836, "logits/rejected": -3.3115639686584473, "logps/chosen": -129.1331329345703, "logps/rejected": -108.62527465820312, "loss": 0.493, "rewards/accuracies": 0.875, "rewards/chosen": -0.21543221175670624, "rewards/margins": 0.5740291476249695, "rewards/rejected": -0.7894613742828369, "step": 1005 }, { "epoch": 0.12, "learning_rate": 2.6927308907877794e-07, "logits/chosen": -3.173074960708618, "logits/rejected": -3.3787930011749268, "logps/chosen": -190.12193298339844, "logps/rejected": -173.813720703125, "loss": 0.3139, "rewards/accuracies": 1.0, "rewards/chosen": 0.12221789360046387, "rewards/margins": 1.5886244773864746, "rewards/rejected": -1.4664065837860107, "step": 1006 }, { "epoch": 0.12, "learning_rate": 2.692379726091537e-07, "logits/chosen": -3.27113676071167, "logits/rejected": -3.5632402896881104, "logps/chosen": -244.30538940429688, "logps/rejected": -267.55548095703125, "loss": 0.3038, "rewards/accuracies": 1.0, "rewards/chosen": 0.22619611024856567, "rewards/margins": 1.4454288482666016, "rewards/rejected": -1.2192325592041016, "step": 1007 }, { "epoch": 0.12, "learning_rate": 2.6920285613952945e-07, "logits/chosen": -3.2622568607330322, "logits/rejected": -3.4214370250701904, "logps/chosen": -179.5801239013672, "logps/rejected": -128.70155334472656, "loss": 0.7735, "rewards/accuracies": 0.5, "rewards/chosen": -0.1666165292263031, "rewards/margins": -0.05037976801395416, "rewards/rejected": -0.11623678356409073, "step": 1008 }, { "epoch": 0.12, "learning_rate": 2.6916773966990515e-07, "logits/chosen": -3.065295696258545, "logits/rejected": -3.0291500091552734, "logps/chosen": -473.509765625, "logps/rejected": -407.18243408203125, "loss": 0.5594, "rewards/accuracies": 0.625, "rewards/chosen": 0.2686772346496582, "rewards/margins": 0.526474118232727, "rewards/rejected": -0.25779685378074646, "step": 1009 }, { "epoch": 0.12, "learning_rate": 2.691326232002809e-07, "logits/chosen": -3.5243821144104004, "logits/rejected": -3.3372323513031006, "logps/chosen": -464.8660888671875, "logps/rejected": -307.4770202636719, "loss": 0.3817, "rewards/accuracies": 0.875, "rewards/chosen": 0.44373583793640137, "rewards/margins": 1.2967848777770996, "rewards/rejected": -0.8530490398406982, "step": 1010 }, { "epoch": 0.12, "learning_rate": 2.6909750673065666e-07, "logits/chosen": -3.042860984802246, "logits/rejected": -3.187589168548584, "logps/chosen": -440.2353515625, "logps/rejected": -380.84912109375, "loss": 0.3126, "rewards/accuracies": 0.875, "rewards/chosen": 0.36759084463119507, "rewards/margins": 1.452439308166504, "rewards/rejected": -1.084848403930664, "step": 1011 }, { "epoch": 0.12, "learning_rate": 2.690623902610324e-07, "logits/chosen": -3.1054091453552246, "logits/rejected": -2.9565813541412354, "logps/chosen": -209.68051147460938, "logps/rejected": -263.39630126953125, "loss": 0.2324, "rewards/accuracies": 1.0, "rewards/chosen": 0.49350109696388245, "rewards/margins": 2.3182191848754883, "rewards/rejected": -1.8247181177139282, "step": 1012 }, { "epoch": 0.12, "learning_rate": 2.6902727379140816e-07, "logits/chosen": -2.4899420738220215, "logits/rejected": -2.744485855102539, "logps/chosen": -208.46820068359375, "logps/rejected": -210.41334533691406, "loss": 0.4245, "rewards/accuracies": 0.625, "rewards/chosen": -0.003124713897705078, "rewards/margins": 1.0875492095947266, "rewards/rejected": -1.090673804283142, "step": 1013 }, { "epoch": 0.12, "learning_rate": 2.689921573217839e-07, "logits/chosen": -3.222302198410034, "logits/rejected": -3.0362987518310547, "logps/chosen": -244.73927307128906, "logps/rejected": -309.02520751953125, "loss": 0.418, "rewards/accuracies": 0.75, "rewards/chosen": 0.005220465362071991, "rewards/margins": 1.5701682567596436, "rewards/rejected": -1.5649478435516357, "step": 1014 }, { "epoch": 0.12, "learning_rate": 2.6895704085215967e-07, "logits/chosen": -3.699026346206665, "logits/rejected": -3.897007703781128, "logps/chosen": -112.20047760009766, "logps/rejected": -253.962646484375, "loss": 0.3433, "rewards/accuracies": 0.875, "rewards/chosen": -0.005344323813915253, "rewards/margins": 1.6780694723129272, "rewards/rejected": -1.6834138631820679, "step": 1015 }, { "epoch": 0.12, "learning_rate": 2.689219243825354e-07, "logits/chosen": -2.9722886085510254, "logits/rejected": -3.029719591140747, "logps/chosen": -179.494384765625, "logps/rejected": -192.71896362304688, "loss": 0.4079, "rewards/accuracies": 1.0, "rewards/chosen": -0.2800194323062897, "rewards/margins": 0.7049951553344727, "rewards/rejected": -0.9850145578384399, "step": 1016 }, { "epoch": 0.12, "learning_rate": 2.688868079129111e-07, "logits/chosen": -2.727360248565674, "logits/rejected": -2.815967559814453, "logps/chosen": -235.6004180908203, "logps/rejected": -271.8351135253906, "loss": 0.3562, "rewards/accuracies": 0.75, "rewards/chosen": 0.2899309992790222, "rewards/margins": 1.3568127155303955, "rewards/rejected": -1.0668818950653076, "step": 1017 }, { "epoch": 0.12, "learning_rate": 2.688516914432869e-07, "logits/chosen": -3.189326524734497, "logits/rejected": -3.5165343284606934, "logps/chosen": -136.04640197753906, "logps/rejected": -152.27615356445312, "loss": 0.4583, "rewards/accuracies": 0.75, "rewards/chosen": 0.012683719396591187, "rewards/margins": 1.0235559940338135, "rewards/rejected": -1.0108723640441895, "step": 1018 }, { "epoch": 0.12, "learning_rate": 2.6881657497366263e-07, "logits/chosen": -3.616323471069336, "logits/rejected": -3.1945323944091797, "logps/chosen": -420.41229248046875, "logps/rejected": -269.4302978515625, "loss": 0.4729, "rewards/accuracies": 0.75, "rewards/chosen": -0.5952157974243164, "rewards/margins": 1.3824422359466553, "rewards/rejected": -1.9776577949523926, "step": 1019 }, { "epoch": 0.12, "learning_rate": 2.687814585040384e-07, "logits/chosen": -3.232095718383789, "logits/rejected": -3.3242034912109375, "logps/chosen": -202.30516052246094, "logps/rejected": -243.49832153320312, "loss": 0.4645, "rewards/accuracies": 0.625, "rewards/chosen": 0.0598047710955143, "rewards/margins": 0.7807599306106567, "rewards/rejected": -0.7209552526473999, "step": 1020 }, { "epoch": 0.12, "learning_rate": 2.6874634203441414e-07, "logits/chosen": -3.7904224395751953, "logits/rejected": -3.6266684532165527, "logps/chosen": -229.26454162597656, "logps/rejected": -261.8208312988281, "loss": 0.2976, "rewards/accuracies": 0.875, "rewards/chosen": 0.44723767042160034, "rewards/margins": 2.6612606048583984, "rewards/rejected": -2.2140228748321533, "step": 1021 }, { "epoch": 0.12, "learning_rate": 2.687112255647899e-07, "logits/chosen": -2.679919958114624, "logits/rejected": -2.5095417499542236, "logps/chosen": -245.14520263671875, "logps/rejected": -310.1107177734375, "loss": 0.3998, "rewards/accuracies": 0.75, "rewards/chosen": 0.31252458691596985, "rewards/margins": 1.1531708240509033, "rewards/rejected": -0.8406461477279663, "step": 1022 }, { "epoch": 0.12, "learning_rate": 2.686761090951656e-07, "logits/chosen": -3.5106751918792725, "logits/rejected": -3.803213119506836, "logps/chosen": -322.8358459472656, "logps/rejected": -402.86181640625, "loss": 0.255, "rewards/accuracies": 0.875, "rewards/chosen": 0.4084693193435669, "rewards/margins": 3.093571901321411, "rewards/rejected": -2.685102701187134, "step": 1023 }, { "epoch": 0.12, "learning_rate": 2.6864099262554135e-07, "logits/chosen": -3.5380992889404297, "logits/rejected": -3.8108534812927246, "logps/chosen": -249.86752319335938, "logps/rejected": -230.67649841308594, "loss": 0.2106, "rewards/accuracies": 0.875, "rewards/chosen": 0.3425680994987488, "rewards/margins": 1.9143743515014648, "rewards/rejected": -1.5718063116073608, "step": 1024 }, { "epoch": 0.12, "learning_rate": 2.686058761559171e-07, "logits/chosen": -3.6146469116210938, "logits/rejected": -3.470076084136963, "logps/chosen": -227.20889282226562, "logps/rejected": -285.7851867675781, "loss": 0.4603, "rewards/accuracies": 0.875, "rewards/chosen": -0.21230866014957428, "rewards/margins": 0.6457839608192444, "rewards/rejected": -0.8580926060676575, "step": 1025 }, { "epoch": 0.12, "learning_rate": 2.6857075968629286e-07, "logits/chosen": -3.1639404296875, "logits/rejected": -3.043987274169922, "logps/chosen": -370.7873229980469, "logps/rejected": -211.15228271484375, "loss": 0.2411, "rewards/accuracies": 1.0, "rewards/chosen": 0.2567855417728424, "rewards/margins": 1.548858642578125, "rewards/rejected": -1.2920732498168945, "step": 1026 }, { "epoch": 0.12, "learning_rate": 2.685356432166686e-07, "logits/chosen": -3.4740045070648193, "logits/rejected": -3.239774227142334, "logps/chosen": -228.8424072265625, "logps/rejected": -251.1282196044922, "loss": 0.5786, "rewards/accuracies": 0.5, "rewards/chosen": -0.5096825361251831, "rewards/margins": 0.7991372346878052, "rewards/rejected": -1.3088197708129883, "step": 1027 }, { "epoch": 0.12, "learning_rate": 2.6850052674704437e-07, "logits/chosen": -3.0004751682281494, "logits/rejected": -2.838207960128784, "logps/chosen": -319.01763916015625, "logps/rejected": -396.8262023925781, "loss": 0.4932, "rewards/accuracies": 0.75, "rewards/chosen": 0.34719160199165344, "rewards/margins": 0.6924643516540527, "rewards/rejected": -0.3452726900577545, "step": 1028 }, { "epoch": 0.12, "learning_rate": 2.684654102774201e-07, "logits/chosen": -4.0099639892578125, "logits/rejected": -3.4964346885681152, "logps/chosen": -192.5290069580078, "logps/rejected": -176.32797241210938, "loss": 0.6351, "rewards/accuracies": 0.75, "rewards/chosen": -0.8147139549255371, "rewards/margins": 0.3653281331062317, "rewards/rejected": -1.1800421476364136, "step": 1029 }, { "epoch": 0.12, "learning_rate": 2.6843029380779587e-07, "logits/chosen": -2.4619901180267334, "logits/rejected": -3.058591842651367, "logps/chosen": -288.5072021484375, "logps/rejected": -169.99966430664062, "loss": 0.3636, "rewards/accuracies": 0.75, "rewards/chosen": 0.3259803056716919, "rewards/margins": 1.6095463037490845, "rewards/rejected": -1.2835659980773926, "step": 1030 }, { "epoch": 0.12, "learning_rate": 2.683951773381716e-07, "logits/chosen": -3.937607526779175, "logits/rejected": -3.728919506072998, "logps/chosen": -200.14173889160156, "logps/rejected": -166.66603088378906, "loss": 0.3692, "rewards/accuracies": 0.875, "rewards/chosen": 0.12413468956947327, "rewards/margins": 1.0714726448059082, "rewards/rejected": -0.9473379254341125, "step": 1031 }, { "epoch": 0.12, "learning_rate": 2.6836006086854733e-07, "logits/chosen": -2.6367554664611816, "logits/rejected": -2.5294089317321777, "logps/chosen": -303.33203125, "logps/rejected": -362.5694274902344, "loss": 0.3346, "rewards/accuracies": 0.875, "rewards/chosen": -0.10377032309770584, "rewards/margins": 1.1775587797164917, "rewards/rejected": -1.2813290357589722, "step": 1032 }, { "epoch": 0.12, "learning_rate": 2.683249443989231e-07, "logits/chosen": -3.3861491680145264, "logits/rejected": -3.283461809158325, "logps/chosen": -314.738525390625, "logps/rejected": -295.21697998046875, "loss": 0.3442, "rewards/accuracies": 0.75, "rewards/chosen": -0.03976573050022125, "rewards/margins": 1.329036831855774, "rewards/rejected": -1.3688024282455444, "step": 1033 }, { "epoch": 0.12, "learning_rate": 2.6828982792929884e-07, "logits/chosen": -3.5183475017547607, "logits/rejected": -3.4384169578552246, "logps/chosen": -223.3145294189453, "logps/rejected": -269.67437744140625, "loss": 0.3638, "rewards/accuracies": 0.875, "rewards/chosen": 0.02993015944957733, "rewards/margins": 1.160941243171692, "rewards/rejected": -1.1310110092163086, "step": 1034 }, { "epoch": 0.12, "learning_rate": 2.682547114596746e-07, "logits/chosen": -2.67248797416687, "logits/rejected": -2.6747329235076904, "logps/chosen": -152.82984924316406, "logps/rejected": -178.09820556640625, "loss": 0.5968, "rewards/accuracies": 0.625, "rewards/chosen": 0.16354693472385406, "rewards/margins": 0.4074639081954956, "rewards/rejected": -0.24391698837280273, "step": 1035 }, { "epoch": 0.12, "learning_rate": 2.682195949900503e-07, "logits/chosen": -2.944822311401367, "logits/rejected": -2.8333382606506348, "logps/chosen": -282.808837890625, "logps/rejected": -256.447509765625, "loss": 0.8227, "rewards/accuracies": 0.75, "rewards/chosen": -0.12120199203491211, "rewards/margins": 0.14242622256278992, "rewards/rejected": -0.263628214597702, "step": 1036 }, { "epoch": 0.12, "learning_rate": 2.681844785204261e-07, "logits/chosen": -3.3827338218688965, "logits/rejected": -3.1289026737213135, "logps/chosen": -256.9677734375, "logps/rejected": -183.3800811767578, "loss": 0.4279, "rewards/accuracies": 0.75, "rewards/chosen": 0.23338547348976135, "rewards/margins": 1.2781083583831787, "rewards/rejected": -1.0447227954864502, "step": 1037 }, { "epoch": 0.12, "learning_rate": 2.681493620508018e-07, "logits/chosen": -3.1724696159362793, "logits/rejected": -3.282991409301758, "logps/chosen": -218.65682983398438, "logps/rejected": -216.284423828125, "loss": 0.4224, "rewards/accuracies": 0.75, "rewards/chosen": 0.07080493122339249, "rewards/margins": 1.2737340927124023, "rewards/rejected": -1.2029290199279785, "step": 1038 }, { "epoch": 0.12, "learning_rate": 2.6811424558117755e-07, "logits/chosen": -2.9321727752685547, "logits/rejected": -2.8633341789245605, "logps/chosen": -239.38882446289062, "logps/rejected": -242.52133178710938, "loss": 0.5855, "rewards/accuracies": 0.625, "rewards/chosen": 0.057997554540634155, "rewards/margins": 0.5173536539077759, "rewards/rejected": -0.4593561589717865, "step": 1039 }, { "epoch": 0.12, "learning_rate": 2.680791291115533e-07, "logits/chosen": -1.744834065437317, "logits/rejected": -1.8946303129196167, "logps/chosen": -364.2138671875, "logps/rejected": -296.4847106933594, "loss": 0.5446, "rewards/accuracies": 0.75, "rewards/chosen": 0.2992771863937378, "rewards/margins": 1.4173939228057861, "rewards/rejected": -1.1181166172027588, "step": 1040 }, { "epoch": 0.12, "learning_rate": 2.6804401264192906e-07, "logits/chosen": -2.891101121902466, "logits/rejected": -3.454953670501709, "logps/chosen": -253.30838012695312, "logps/rejected": -244.77305603027344, "loss": 0.434, "rewards/accuracies": 0.875, "rewards/chosen": 0.4544670879840851, "rewards/margins": 1.174118995666504, "rewards/rejected": -0.7196518778800964, "step": 1041 }, { "epoch": 0.12, "learning_rate": 2.680088961723048e-07, "logits/chosen": -2.6055498123168945, "logits/rejected": -2.8302998542785645, "logps/chosen": -289.50347900390625, "logps/rejected": -289.9058532714844, "loss": 0.409, "rewards/accuracies": 0.75, "rewards/chosen": -0.11054809391498566, "rewards/margins": 1.4239352941513062, "rewards/rejected": -1.5344834327697754, "step": 1042 }, { "epoch": 0.12, "learning_rate": 2.6797377970268057e-07, "logits/chosen": -3.259413719177246, "logits/rejected": -3.3293957710266113, "logps/chosen": -301.2493896484375, "logps/rejected": -260.6601257324219, "loss": 0.2578, "rewards/accuracies": 1.0, "rewards/chosen": 0.09799051284790039, "rewards/margins": 1.4851690530776978, "rewards/rejected": -1.3871784210205078, "step": 1043 }, { "epoch": 0.12, "learning_rate": 2.6793866323305627e-07, "logits/chosen": -2.489177703857422, "logits/rejected": -2.4383292198181152, "logps/chosen": -183.40313720703125, "logps/rejected": -242.26174926757812, "loss": 0.341, "rewards/accuracies": 0.875, "rewards/chosen": 0.3571731746196747, "rewards/margins": 2.117004871368408, "rewards/rejected": -1.7598316669464111, "step": 1044 }, { "epoch": 0.12, "learning_rate": 2.67903546763432e-07, "logits/chosen": -3.4089417457580566, "logits/rejected": -3.052192211151123, "logps/chosen": -347.5773010253906, "logps/rejected": -349.1462707519531, "loss": 0.5503, "rewards/accuracies": 0.875, "rewards/chosen": 0.005301922559738159, "rewards/margins": 1.3491414785385132, "rewards/rejected": -1.3438396453857422, "step": 1045 }, { "epoch": 0.12, "learning_rate": 2.678684302938078e-07, "logits/chosen": -2.7101449966430664, "logits/rejected": -2.726557493209839, "logps/chosen": -291.880859375, "logps/rejected": -336.6107177734375, "loss": 0.2856, "rewards/accuracies": 0.875, "rewards/chosen": 0.0006092656403779984, "rewards/margins": 1.6222069263458252, "rewards/rejected": -1.6215977668762207, "step": 1046 }, { "epoch": 0.12, "learning_rate": 2.6783331382418353e-07, "logits/chosen": -3.1871161460876465, "logits/rejected": -3.429884910583496, "logps/chosen": -203.8602294921875, "logps/rejected": -251.33966064453125, "loss": 0.6293, "rewards/accuracies": 0.75, "rewards/chosen": -0.17574141919612885, "rewards/margins": 0.2031751573085785, "rewards/rejected": -0.37891656160354614, "step": 1047 }, { "epoch": 0.12, "learning_rate": 2.677981973545593e-07, "logits/chosen": -2.3977134227752686, "logits/rejected": -2.600922107696533, "logps/chosen": -208.11224365234375, "logps/rejected": -247.62356567382812, "loss": 0.2661, "rewards/accuracies": 1.0, "rewards/chosen": 0.014521032571792603, "rewards/margins": 1.5620085000991821, "rewards/rejected": -1.5474873781204224, "step": 1048 }, { "epoch": 0.12, "learning_rate": 2.6776308088493504e-07, "logits/chosen": -3.1489908695220947, "logits/rejected": -3.1833267211914062, "logps/chosen": -159.6882781982422, "logps/rejected": -157.5350341796875, "loss": 0.6725, "rewards/accuracies": 0.375, "rewards/chosen": 0.04417683556675911, "rewards/margins": 0.305560439825058, "rewards/rejected": -0.26138362288475037, "step": 1049 }, { "epoch": 0.12, "learning_rate": 2.677279644153108e-07, "logits/chosen": -2.7399721145629883, "logits/rejected": -2.56968355178833, "logps/chosen": -375.86492919921875, "logps/rejected": -251.56936645507812, "loss": 0.8015, "rewards/accuracies": 0.625, "rewards/chosen": -0.3396145701408386, "rewards/margins": 0.2890181541442871, "rewards/rejected": -0.628632664680481, "step": 1050 }, { "epoch": 0.12, "learning_rate": 2.6769284794568654e-07, "logits/chosen": -2.6026909351348877, "logits/rejected": -2.691554069519043, "logps/chosen": -326.17657470703125, "logps/rejected": -278.4223937988281, "loss": 0.474, "rewards/accuracies": 0.625, "rewards/chosen": 0.4548858404159546, "rewards/margins": 0.9007307291030884, "rewards/rejected": -0.445844829082489, "step": 1051 }, { "epoch": 0.12, "learning_rate": 2.6765773147606225e-07, "logits/chosen": -2.9153225421905518, "logits/rejected": -2.8656976222991943, "logps/chosen": -403.8117370605469, "logps/rejected": -272.4035949707031, "loss": 0.4506, "rewards/accuracies": 0.625, "rewards/chosen": -0.37804603576660156, "rewards/margins": 1.2277987003326416, "rewards/rejected": -1.6058447360992432, "step": 1052 }, { "epoch": 0.12, "learning_rate": 2.67622615006438e-07, "logits/chosen": -2.333549737930298, "logits/rejected": -2.3535423278808594, "logps/chosen": -356.178955078125, "logps/rejected": -322.5814514160156, "loss": 0.3391, "rewards/accuracies": 0.875, "rewards/chosen": 0.032656386494636536, "rewards/margins": 1.3281056880950928, "rewards/rejected": -1.2954492568969727, "step": 1053 }, { "epoch": 0.12, "learning_rate": 2.6758749853681375e-07, "logits/chosen": -3.3473165035247803, "logits/rejected": -3.547835350036621, "logps/chosen": -197.41567993164062, "logps/rejected": -238.34506225585938, "loss": 0.2488, "rewards/accuracies": 0.875, "rewards/chosen": 0.38756442070007324, "rewards/margins": 2.175715208053589, "rewards/rejected": -1.7881507873535156, "step": 1054 }, { "epoch": 0.12, "learning_rate": 2.675523820671895e-07, "logits/chosen": -3.1267170906066895, "logits/rejected": -3.072960615158081, "logps/chosen": -302.6055908203125, "logps/rejected": -336.59906005859375, "loss": 0.2842, "rewards/accuracies": 0.875, "rewards/chosen": 0.4374012053012848, "rewards/margins": 1.9779233932495117, "rewards/rejected": -1.5405220985412598, "step": 1055 }, { "epoch": 0.12, "learning_rate": 2.6751726559756526e-07, "logits/chosen": -3.085273027420044, "logits/rejected": -2.912855863571167, "logps/chosen": -208.47203063964844, "logps/rejected": -257.9039306640625, "loss": 0.4504, "rewards/accuracies": 1.0, "rewards/chosen": 0.15865278244018555, "rewards/margins": 0.742802083492279, "rewards/rejected": -0.5841493010520935, "step": 1056 }, { "epoch": 0.12, "learning_rate": 2.6748214912794096e-07, "logits/chosen": -3.421421527862549, "logits/rejected": -3.3457555770874023, "logps/chosen": -367.41168212890625, "logps/rejected": -197.7659454345703, "loss": 0.4109, "rewards/accuracies": 0.75, "rewards/chosen": -0.29268813133239746, "rewards/margins": 0.9864341020584106, "rewards/rejected": -1.2791221141815186, "step": 1057 }, { "epoch": 0.12, "learning_rate": 2.674470326583167e-07, "logits/chosen": -3.323819398880005, "logits/rejected": -3.4420759677886963, "logps/chosen": -286.00201416015625, "logps/rejected": -293.2824401855469, "loss": 0.5627, "rewards/accuracies": 0.75, "rewards/chosen": -0.4576173722743988, "rewards/margins": 0.4057440757751465, "rewards/rejected": -0.8633613586425781, "step": 1058 }, { "epoch": 0.12, "learning_rate": 2.674119161886925e-07, "logits/chosen": -2.7999916076660156, "logits/rejected": -2.7776260375976562, "logps/chosen": -239.51145935058594, "logps/rejected": -280.16815185546875, "loss": 0.3613, "rewards/accuracies": 0.75, "rewards/chosen": 0.2060440629720688, "rewards/margins": 1.3570518493652344, "rewards/rejected": -1.151007890701294, "step": 1059 }, { "epoch": 0.12, "learning_rate": 2.673767997190682e-07, "logits/chosen": -2.5541467666625977, "logits/rejected": -2.443340539932251, "logps/chosen": -136.96914672851562, "logps/rejected": -220.15000915527344, "loss": 0.4591, "rewards/accuracies": 0.75, "rewards/chosen": 0.1561250239610672, "rewards/margins": 0.828513503074646, "rewards/rejected": -0.6723884344100952, "step": 1060 }, { "epoch": 0.12, "learning_rate": 2.67341683249444e-07, "logits/chosen": -2.7907042503356934, "logits/rejected": -2.606461524963379, "logps/chosen": -549.8987426757812, "logps/rejected": -331.3394470214844, "loss": 0.4905, "rewards/accuracies": 0.875, "rewards/chosen": -0.4911750853061676, "rewards/margins": 0.9159469604492188, "rewards/rejected": -1.4071221351623535, "step": 1061 }, { "epoch": 0.12, "learning_rate": 2.6730656677981973e-07, "logits/chosen": -3.384805679321289, "logits/rejected": -3.3100056648254395, "logps/chosen": -265.1316223144531, "logps/rejected": -303.02789306640625, "loss": 0.7555, "rewards/accuracies": 0.625, "rewards/chosen": -0.18150471150875092, "rewards/margins": 0.9252256155014038, "rewards/rejected": -1.1067302227020264, "step": 1062 }, { "epoch": 0.12, "learning_rate": 2.672714503101955e-07, "logits/chosen": -3.2793126106262207, "logits/rejected": -3.299539566040039, "logps/chosen": -416.5001525878906, "logps/rejected": -247.2704620361328, "loss": 0.5255, "rewards/accuracies": 0.75, "rewards/chosen": 0.14084531366825104, "rewards/margins": 0.811980128288269, "rewards/rejected": -0.6711347699165344, "step": 1063 }, { "epoch": 0.12, "learning_rate": 2.6723633384057124e-07, "logits/chosen": -2.9731521606445312, "logits/rejected": -3.0756006240844727, "logps/chosen": -182.33493041992188, "logps/rejected": -201.4945831298828, "loss": 0.4503, "rewards/accuracies": 0.875, "rewards/chosen": -0.186550110578537, "rewards/margins": 0.8320742249488831, "rewards/rejected": -1.0186243057250977, "step": 1064 }, { "epoch": 0.12, "learning_rate": 2.6720121737094694e-07, "logits/chosen": -2.324869394302368, "logits/rejected": -2.5463755130767822, "logps/chosen": -211.65577697753906, "logps/rejected": -219.51858520507812, "loss": 0.5382, "rewards/accuracies": 0.625, "rewards/chosen": -0.16899175941944122, "rewards/margins": 0.8738133907318115, "rewards/rejected": -1.0428051948547363, "step": 1065 }, { "epoch": 0.12, "learning_rate": 2.671661009013227e-07, "logits/chosen": -2.910780906677246, "logits/rejected": -2.7264161109924316, "logps/chosen": -341.3624267578125, "logps/rejected": -404.66522216796875, "loss": 0.335, "rewards/accuracies": 0.875, "rewards/chosen": 0.10699562728404999, "rewards/margins": 1.8566417694091797, "rewards/rejected": -1.7496461868286133, "step": 1066 }, { "epoch": 0.12, "learning_rate": 2.6713098443169845e-07, "logits/chosen": -3.347903251647949, "logits/rejected": -3.618755578994751, "logps/chosen": -272.55792236328125, "logps/rejected": -255.17843627929688, "loss": 0.2666, "rewards/accuracies": 0.875, "rewards/chosen": 0.20856042206287384, "rewards/margins": 2.3251523971557617, "rewards/rejected": -2.116591691970825, "step": 1067 }, { "epoch": 0.12, "learning_rate": 2.670958679620742e-07, "logits/chosen": -3.202124834060669, "logits/rejected": -3.703721046447754, "logps/chosen": -206.53793334960938, "logps/rejected": -279.0238037109375, "loss": 0.4358, "rewards/accuracies": 0.75, "rewards/chosen": 0.09561172127723694, "rewards/margins": 1.4642515182495117, "rewards/rejected": -1.3686398267745972, "step": 1068 }, { "epoch": 0.12, "learning_rate": 2.6706075149244996e-07, "logits/chosen": -2.863401174545288, "logits/rejected": -3.3243112564086914, "logps/chosen": -258.0196228027344, "logps/rejected": -268.0166015625, "loss": 0.5218, "rewards/accuracies": 0.75, "rewards/chosen": 0.04101572930812836, "rewards/margins": 1.4315996170043945, "rewards/rejected": -1.3905837535858154, "step": 1069 }, { "epoch": 0.12, "learning_rate": 2.6702563502282566e-07, "logits/chosen": -3.385283946990967, "logits/rejected": -3.264040946960449, "logps/chosen": -231.66885375976562, "logps/rejected": -324.6797180175781, "loss": 0.532, "rewards/accuracies": 0.625, "rewards/chosen": -0.3443859815597534, "rewards/margins": 0.9833326935768127, "rewards/rejected": -1.327718734741211, "step": 1070 }, { "epoch": 0.12, "learning_rate": 2.6699051855320146e-07, "logits/chosen": -2.8875722885131836, "logits/rejected": -2.58618426322937, "logps/chosen": -527.53076171875, "logps/rejected": -305.5352783203125, "loss": 0.2139, "rewards/accuracies": 1.0, "rewards/chosen": 0.61677485704422, "rewards/margins": 2.2003703117370605, "rewards/rejected": -1.583595633506775, "step": 1071 }, { "epoch": 0.12, "learning_rate": 2.669554020835772e-07, "logits/chosen": -3.650496244430542, "logits/rejected": -3.6738176345825195, "logps/chosen": -400.08001708984375, "logps/rejected": -283.344970703125, "loss": 0.2575, "rewards/accuracies": 1.0, "rewards/chosen": 0.046713605523109436, "rewards/margins": 1.6101597547531128, "rewards/rejected": -1.563446283340454, "step": 1072 }, { "epoch": 0.12, "learning_rate": 2.669202856139529e-07, "logits/chosen": -2.981619119644165, "logits/rejected": -2.9584925174713135, "logps/chosen": -412.3600158691406, "logps/rejected": -236.72940063476562, "loss": 0.4286, "rewards/accuracies": 0.875, "rewards/chosen": 0.5699102878570557, "rewards/margins": 0.8904027938842773, "rewards/rejected": -0.3204925060272217, "step": 1073 }, { "epoch": 0.12, "learning_rate": 2.6688516914432867e-07, "logits/chosen": -2.1321170330047607, "logits/rejected": -2.559342622756958, "logps/chosen": -342.568603515625, "logps/rejected": -226.67481994628906, "loss": 0.5178, "rewards/accuracies": 0.75, "rewards/chosen": 0.030689842998981476, "rewards/margins": 1.3157991170883179, "rewards/rejected": -1.2851094007492065, "step": 1074 }, { "epoch": 0.12, "learning_rate": 2.668500526747044e-07, "logits/chosen": -2.3557770252227783, "logits/rejected": -2.2170639038085938, "logps/chosen": -384.9610595703125, "logps/rejected": -368.3170471191406, "loss": 0.4829, "rewards/accuracies": 0.75, "rewards/chosen": -0.11112399399280548, "rewards/margins": 0.7109432816505432, "rewards/rejected": -0.8220672607421875, "step": 1075 }, { "epoch": 0.12, "learning_rate": 2.668149362050802e-07, "logits/chosen": -3.3153393268585205, "logits/rejected": -3.2915453910827637, "logps/chosen": -93.95964050292969, "logps/rejected": -157.31948852539062, "loss": 0.5637, "rewards/accuracies": 0.75, "rewards/chosen": -0.3066788911819458, "rewards/margins": 0.5229943990707397, "rewards/rejected": -0.8296732902526855, "step": 1076 }, { "epoch": 0.12, "learning_rate": 2.6677981973545593e-07, "logits/chosen": -2.7158753871917725, "logits/rejected": -2.593374729156494, "logps/chosen": -316.8660888671875, "logps/rejected": -325.7777099609375, "loss": 0.561, "rewards/accuracies": 0.625, "rewards/chosen": -0.7072499394416809, "rewards/margins": 0.40227651596069336, "rewards/rejected": -1.109526515007019, "step": 1077 }, { "epoch": 0.12, "learning_rate": 2.6674470326583163e-07, "logits/chosen": -3.1633102893829346, "logits/rejected": -3.722296714782715, "logps/chosen": -301.582275390625, "logps/rejected": -209.6153564453125, "loss": 0.6795, "rewards/accuracies": 0.5, "rewards/chosen": 0.14551995694637299, "rewards/margins": 0.6555773019790649, "rewards/rejected": -0.5100574493408203, "step": 1078 }, { "epoch": 0.12, "learning_rate": 2.667095867962074e-07, "logits/chosen": -3.211073398590088, "logits/rejected": -2.792351484298706, "logps/chosen": -423.162353515625, "logps/rejected": -229.6708984375, "loss": 0.3373, "rewards/accuracies": 0.75, "rewards/chosen": 0.5055925250053406, "rewards/margins": 2.000922679901123, "rewards/rejected": -1.4953300952911377, "step": 1079 }, { "epoch": 0.12, "learning_rate": 2.6667447032658314e-07, "logits/chosen": -2.6621952056884766, "logits/rejected": -2.622739315032959, "logps/chosen": -207.3436737060547, "logps/rejected": -267.5551452636719, "loss": 0.3238, "rewards/accuracies": 1.0, "rewards/chosen": -0.07570178806781769, "rewards/margins": 2.126349687576294, "rewards/rejected": -2.2020514011383057, "step": 1080 }, { "epoch": 0.12, "learning_rate": 2.666393538569589e-07, "logits/chosen": -3.1314122676849365, "logits/rejected": -3.0342020988464355, "logps/chosen": -131.2550048828125, "logps/rejected": -276.4896545410156, "loss": 0.6655, "rewards/accuracies": 0.75, "rewards/chosen": -0.20077301561832428, "rewards/margins": 0.6015779376029968, "rewards/rejected": -0.8023509383201599, "step": 1081 }, { "epoch": 0.12, "learning_rate": 2.6660423738733465e-07, "logits/chosen": -3.6032538414001465, "logits/rejected": -3.600999355316162, "logps/chosen": -245.648681640625, "logps/rejected": -175.40623474121094, "loss": 0.3843, "rewards/accuracies": 0.75, "rewards/chosen": 0.3788723051548004, "rewards/margins": 1.3567819595336914, "rewards/rejected": -0.9779095649719238, "step": 1082 }, { "epoch": 0.12, "learning_rate": 2.665691209177104e-07, "logits/chosen": -2.820107936859131, "logits/rejected": -2.7412166595458984, "logps/chosen": -329.960693359375, "logps/rejected": -177.3825225830078, "loss": 0.3421, "rewards/accuracies": 0.875, "rewards/chosen": 0.23718053102493286, "rewards/margins": 1.4442846775054932, "rewards/rejected": -1.2071040868759155, "step": 1083 }, { "epoch": 0.12, "learning_rate": 2.6653400444808616e-07, "logits/chosen": -2.8122293949127197, "logits/rejected": -2.8643693923950195, "logps/chosen": -160.08087158203125, "logps/rejected": -227.11929321289062, "loss": 0.7896, "rewards/accuracies": 0.375, "rewards/chosen": -0.5910912752151489, "rewards/margins": 0.23349127173423767, "rewards/rejected": -0.824582576751709, "step": 1084 }, { "epoch": 0.13, "learning_rate": 2.664988879784619e-07, "logits/chosen": -2.9615302085876465, "logits/rejected": -2.979785919189453, "logps/chosen": -348.69891357421875, "logps/rejected": -274.6491394042969, "loss": 0.4069, "rewards/accuracies": 0.75, "rewards/chosen": 0.18154031038284302, "rewards/margins": 1.5098885297775269, "rewards/rejected": -1.3283482789993286, "step": 1085 }, { "epoch": 0.13, "learning_rate": 2.664637715088376e-07, "logits/chosen": -3.1836366653442383, "logits/rejected": -3.4585728645324707, "logps/chosen": -244.2884979248047, "logps/rejected": -358.07452392578125, "loss": 0.4327, "rewards/accuracies": 0.625, "rewards/chosen": -0.034999266266822815, "rewards/margins": 1.1542115211486816, "rewards/rejected": -1.1892108917236328, "step": 1086 }, { "epoch": 0.13, "learning_rate": 2.6642865503921337e-07, "logits/chosen": -3.0731682777404785, "logits/rejected": -2.764840602874756, "logps/chosen": -336.82794189453125, "logps/rejected": -254.755859375, "loss": 0.7336, "rewards/accuracies": 0.875, "rewards/chosen": -0.5028422474861145, "rewards/margins": 0.6855195760726929, "rewards/rejected": -1.1883617639541626, "step": 1087 }, { "epoch": 0.13, "learning_rate": 2.663935385695891e-07, "logits/chosen": -3.5641391277313232, "logits/rejected": -3.512181282043457, "logps/chosen": -220.45806884765625, "logps/rejected": -265.07550048828125, "loss": 0.7004, "rewards/accuracies": 0.625, "rewards/chosen": -0.3117159605026245, "rewards/margins": 0.05495902895927429, "rewards/rejected": -0.3666749596595764, "step": 1088 }, { "epoch": 0.13, "learning_rate": 2.663584220999649e-07, "logits/chosen": -3.6256418228149414, "logits/rejected": -3.539555311203003, "logps/chosen": -264.65911865234375, "logps/rejected": -226.02462768554688, "loss": 0.4815, "rewards/accuracies": 0.75, "rewards/chosen": -0.21201497316360474, "rewards/margins": 1.1389046907424927, "rewards/rejected": -1.3509197235107422, "step": 1089 }, { "epoch": 0.13, "learning_rate": 2.6632330563034063e-07, "logits/chosen": -4.0398406982421875, "logits/rejected": -3.7346415519714355, "logps/chosen": -213.818359375, "logps/rejected": -209.65670776367188, "loss": 0.4201, "rewards/accuracies": 0.875, "rewards/chosen": -0.07279305160045624, "rewards/margins": 1.0541012287139893, "rewards/rejected": -1.126894235610962, "step": 1090 }, { "epoch": 0.13, "learning_rate": 2.6628818916071633e-07, "logits/chosen": -3.548835039138794, "logits/rejected": -3.646570920944214, "logps/chosen": -66.86070251464844, "logps/rejected": -151.00665283203125, "loss": 0.477, "rewards/accuracies": 0.75, "rewards/chosen": 0.031580112874507904, "rewards/margins": 1.167687177658081, "rewards/rejected": -1.1361072063446045, "step": 1091 }, { "epoch": 0.13, "learning_rate": 2.662530726910921e-07, "logits/chosen": -2.8751325607299805, "logits/rejected": -2.998018264770508, "logps/chosen": -367.55194091796875, "logps/rejected": -322.14599609375, "loss": 0.4341, "rewards/accuracies": 0.75, "rewards/chosen": -0.14278297126293182, "rewards/margins": 0.9159032106399536, "rewards/rejected": -1.0586862564086914, "step": 1092 }, { "epoch": 0.13, "learning_rate": 2.662179562214679e-07, "logits/chosen": -2.8614587783813477, "logits/rejected": -2.7682926654815674, "logps/chosen": -227.6217803955078, "logps/rejected": -198.5792694091797, "loss": 0.4673, "rewards/accuracies": 0.625, "rewards/chosen": 0.0648302361369133, "rewards/margins": 1.257708191871643, "rewards/rejected": -1.1928777694702148, "step": 1093 }, { "epoch": 0.13, "learning_rate": 2.661828397518436e-07, "logits/chosen": -4.009253978729248, "logits/rejected": -3.6083920001983643, "logps/chosen": -340.5731201171875, "logps/rejected": -225.06423950195312, "loss": 0.3959, "rewards/accuracies": 0.875, "rewards/chosen": 0.17818161845207214, "rewards/margins": 1.3058801889419556, "rewards/rejected": -1.127698540687561, "step": 1094 }, { "epoch": 0.13, "learning_rate": 2.6614772328221934e-07, "logits/chosen": -3.0888280868530273, "logits/rejected": -4.110617160797119, "logps/chosen": -113.39640808105469, "logps/rejected": -321.6815185546875, "loss": 0.4615, "rewards/accuracies": 0.75, "rewards/chosen": -0.0016039982438087463, "rewards/margins": 1.353739619255066, "rewards/rejected": -1.3553435802459717, "step": 1095 }, { "epoch": 0.13, "learning_rate": 2.661126068125951e-07, "logits/chosen": -2.986386775970459, "logits/rejected": -3.0536322593688965, "logps/chosen": -253.0177001953125, "logps/rejected": -215.14727783203125, "loss": 0.407, "rewards/accuracies": 0.875, "rewards/chosen": 0.2331954538822174, "rewards/margins": 1.0443871021270752, "rewards/rejected": -0.8111915588378906, "step": 1096 }, { "epoch": 0.13, "learning_rate": 2.6607749034297085e-07, "logits/chosen": -2.886111259460449, "logits/rejected": -2.669189691543579, "logps/chosen": -281.52105712890625, "logps/rejected": -261.98297119140625, "loss": 0.4427, "rewards/accuracies": 0.875, "rewards/chosen": -0.16390664875507355, "rewards/margins": 1.2526737451553345, "rewards/rejected": -1.4165804386138916, "step": 1097 }, { "epoch": 0.13, "learning_rate": 2.660423738733466e-07, "logits/chosen": -3.1427114009857178, "logits/rejected": -3.1371700763702393, "logps/chosen": -134.64695739746094, "logps/rejected": -276.44525146484375, "loss": 0.4604, "rewards/accuracies": 0.625, "rewards/chosen": -0.1724531352519989, "rewards/margins": 1.3602094650268555, "rewards/rejected": -1.5326625108718872, "step": 1098 }, { "epoch": 0.13, "learning_rate": 2.660072574037223e-07, "logits/chosen": -3.1629531383514404, "logits/rejected": -3.320312976837158, "logps/chosen": -305.5906982421875, "logps/rejected": -248.65670776367188, "loss": 0.307, "rewards/accuracies": 0.75, "rewards/chosen": -0.395014226436615, "rewards/margins": 1.9799516201019287, "rewards/rejected": -2.3749659061431885, "step": 1099 }, { "epoch": 0.13, "learning_rate": 2.6597214093409806e-07, "logits/chosen": -3.239506244659424, "logits/rejected": -3.213106155395508, "logps/chosen": -154.67005920410156, "logps/rejected": -191.02911376953125, "loss": 0.6085, "rewards/accuracies": 0.5, "rewards/chosen": -0.2719899117946625, "rewards/margins": 0.33840304613113403, "rewards/rejected": -0.6103929877281189, "step": 1100 }, { "epoch": 0.13, "learning_rate": 2.659370244644738e-07, "logits/chosen": -3.1850335597991943, "logits/rejected": -3.552513599395752, "logps/chosen": -136.6389923095703, "logps/rejected": -201.15696716308594, "loss": 0.4944, "rewards/accuracies": 0.875, "rewards/chosen": -0.1187017560005188, "rewards/margins": 0.5159597396850586, "rewards/rejected": -0.6346614956855774, "step": 1101 }, { "epoch": 0.13, "learning_rate": 2.6590190799484957e-07, "logits/chosen": -2.8145833015441895, "logits/rejected": -2.9616990089416504, "logps/chosen": -306.9453125, "logps/rejected": -241.09246826171875, "loss": 0.4184, "rewards/accuracies": 0.75, "rewards/chosen": 0.055389538407325745, "rewards/margins": 0.9024931192398071, "rewards/rejected": -0.8471035957336426, "step": 1102 }, { "epoch": 0.13, "learning_rate": 2.658667915252253e-07, "logits/chosen": -3.4070816040039062, "logits/rejected": -3.544999361038208, "logps/chosen": -230.68817138671875, "logps/rejected": -215.06195068359375, "loss": 0.2694, "rewards/accuracies": 0.875, "rewards/chosen": 0.21901550889015198, "rewards/margins": 2.616081476211548, "rewards/rejected": -2.3970658779144287, "step": 1103 }, { "epoch": 0.13, "learning_rate": 2.658316750556011e-07, "logits/chosen": -3.8440022468566895, "logits/rejected": -3.4502382278442383, "logps/chosen": -208.4144287109375, "logps/rejected": -165.04470825195312, "loss": 0.3298, "rewards/accuracies": 1.0, "rewards/chosen": 0.1886543333530426, "rewards/margins": 1.1106518507003784, "rewards/rejected": -0.9219975471496582, "step": 1104 }, { "epoch": 0.13, "learning_rate": 2.6579655858597683e-07, "logits/chosen": -2.5575389862060547, "logits/rejected": -2.8185529708862305, "logps/chosen": -279.93829345703125, "logps/rejected": -322.7536926269531, "loss": 0.1733, "rewards/accuracies": 1.0, "rewards/chosen": 0.5039827823638916, "rewards/margins": 2.1893160343170166, "rewards/rejected": -1.6853331327438354, "step": 1105 }, { "epoch": 0.13, "learning_rate": 2.657614421163526e-07, "logits/chosen": -3.8403432369232178, "logits/rejected": -3.906801462173462, "logps/chosen": -230.33114624023438, "logps/rejected": -251.86447143554688, "loss": 0.4141, "rewards/accuracies": 0.875, "rewards/chosen": -0.013188734650611877, "rewards/margins": 1.3897064924240112, "rewards/rejected": -1.402895212173462, "step": 1106 }, { "epoch": 0.13, "learning_rate": 2.657263256467283e-07, "logits/chosen": -2.2037038803100586, "logits/rejected": -2.3885562419891357, "logps/chosen": -231.07699584960938, "logps/rejected": -227.6617431640625, "loss": 0.5779, "rewards/accuracies": 0.625, "rewards/chosen": 0.038130901753902435, "rewards/margins": 0.7168096899986267, "rewards/rejected": -0.6786787509918213, "step": 1107 }, { "epoch": 0.13, "learning_rate": 2.6569120917710404e-07, "logits/chosen": -3.2889950275421143, "logits/rejected": -3.2071967124938965, "logps/chosen": -124.60722351074219, "logps/rejected": -165.28273010253906, "loss": 0.3187, "rewards/accuracies": 1.0, "rewards/chosen": -0.13064639270305634, "rewards/margins": 1.4673404693603516, "rewards/rejected": -1.5979869365692139, "step": 1108 }, { "epoch": 0.13, "learning_rate": 2.656560927074798e-07, "logits/chosen": -2.0073370933532715, "logits/rejected": -2.2956976890563965, "logps/chosen": -300.39739990234375, "logps/rejected": -172.28770446777344, "loss": 0.4172, "rewards/accuracies": 0.875, "rewards/chosen": 0.22266235947608948, "rewards/margins": 0.9134190082550049, "rewards/rejected": -0.6907566785812378, "step": 1109 }, { "epoch": 0.13, "learning_rate": 2.6562097623785555e-07, "logits/chosen": -3.15394926071167, "logits/rejected": -2.9639358520507812, "logps/chosen": -376.3689270019531, "logps/rejected": -422.06671142578125, "loss": 0.3243, "rewards/accuracies": 1.0, "rewards/chosen": -0.3569018542766571, "rewards/margins": 1.726264238357544, "rewards/rejected": -2.0831661224365234, "step": 1110 }, { "epoch": 0.13, "learning_rate": 2.655858597682313e-07, "logits/chosen": -3.0604772567749023, "logits/rejected": -3.125676393508911, "logps/chosen": -159.4078369140625, "logps/rejected": -174.72250366210938, "loss": 0.494, "rewards/accuracies": 0.875, "rewards/chosen": 0.1539372205734253, "rewards/margins": 1.3404419422149658, "rewards/rejected": -1.186504602432251, "step": 1111 }, { "epoch": 0.13, "learning_rate": 2.6555074329860705e-07, "logits/chosen": -3.1175551414489746, "logits/rejected": -3.2056047916412354, "logps/chosen": -431.451171875, "logps/rejected": -279.39337158203125, "loss": 0.2793, "rewards/accuracies": 0.875, "rewards/chosen": 0.12195511907339096, "rewards/margins": 1.8242889642715454, "rewards/rejected": -1.7023338079452515, "step": 1112 }, { "epoch": 0.13, "learning_rate": 2.6551562682898275e-07, "logits/chosen": -3.4403445720672607, "logits/rejected": -2.997777223587036, "logps/chosen": -290.8890075683594, "logps/rejected": -202.0396270751953, "loss": 0.4725, "rewards/accuracies": 0.875, "rewards/chosen": 0.09080486744642258, "rewards/margins": 0.906679630279541, "rewards/rejected": -0.8158748149871826, "step": 1113 }, { "epoch": 0.13, "learning_rate": 2.654805103593585e-07, "logits/chosen": -3.6513216495513916, "logits/rejected": -3.675164222717285, "logps/chosen": -189.81710815429688, "logps/rejected": -122.24368286132812, "loss": 0.3833, "rewards/accuracies": 0.875, "rewards/chosen": 0.34641045331954956, "rewards/margins": 1.2991657257080078, "rewards/rejected": -0.9527552127838135, "step": 1114 }, { "epoch": 0.13, "learning_rate": 2.6544539388973426e-07, "logits/chosen": -3.4919981956481934, "logits/rejected": -3.3947594165802, "logps/chosen": -293.4674072265625, "logps/rejected": -342.3107604980469, "loss": 0.7052, "rewards/accuracies": 0.625, "rewards/chosen": -0.5479456186294556, "rewards/margins": 0.49130839109420776, "rewards/rejected": -1.0392539501190186, "step": 1115 }, { "epoch": 0.13, "learning_rate": 2.6541027742011e-07, "logits/chosen": -3.504812717437744, "logits/rejected": -3.1602911949157715, "logps/chosen": -163.70880126953125, "logps/rejected": -177.30487060546875, "loss": 0.5358, "rewards/accuracies": 0.75, "rewards/chosen": -0.13161659240722656, "rewards/margins": 1.2275633811950684, "rewards/rejected": -1.3591800928115845, "step": 1116 }, { "epoch": 0.13, "learning_rate": 2.6537516095048577e-07, "logits/chosen": -3.481541156768799, "logits/rejected": -3.0874714851379395, "logps/chosen": -221.72964477539062, "logps/rejected": -269.4653015136719, "loss": 0.3156, "rewards/accuracies": 1.0, "rewards/chosen": -0.1969405710697174, "rewards/margins": 1.3327747583389282, "rewards/rejected": -1.5297152996063232, "step": 1117 }, { "epoch": 0.13, "learning_rate": 2.653400444808615e-07, "logits/chosen": -3.1617136001586914, "logits/rejected": -3.3838589191436768, "logps/chosen": -221.19395446777344, "logps/rejected": -188.6819305419922, "loss": 0.5127, "rewards/accuracies": 0.875, "rewards/chosen": -0.3910120725631714, "rewards/margins": 0.7642046213150024, "rewards/rejected": -1.1552165746688843, "step": 1118 }, { "epoch": 0.13, "learning_rate": 2.653049280112373e-07, "logits/chosen": -3.4359350204467773, "logits/rejected": -3.5465497970581055, "logps/chosen": -286.7685852050781, "logps/rejected": -167.11415100097656, "loss": 0.384, "rewards/accuracies": 0.75, "rewards/chosen": 0.14442375302314758, "rewards/margins": 1.4920531511306763, "rewards/rejected": -1.3476293087005615, "step": 1119 }, { "epoch": 0.13, "learning_rate": 2.6526981154161303e-07, "logits/chosen": -3.0433547496795654, "logits/rejected": -3.186009168624878, "logps/chosen": -403.11566162109375, "logps/rejected": -269.0381164550781, "loss": 0.4046, "rewards/accuracies": 1.0, "rewards/chosen": 0.42417412996292114, "rewards/margins": 0.8064361810684204, "rewards/rejected": -0.38226208090782166, "step": 1120 }, { "epoch": 0.13, "learning_rate": 2.6523469507198873e-07, "logits/chosen": -3.0856523513793945, "logits/rejected": -3.4052882194519043, "logps/chosen": -220.1566925048828, "logps/rejected": -276.8841552734375, "loss": 0.1869, "rewards/accuracies": 1.0, "rewards/chosen": 0.6537335515022278, "rewards/margins": 1.9068706035614014, "rewards/rejected": -1.2531368732452393, "step": 1121 }, { "epoch": 0.13, "learning_rate": 2.651995786023645e-07, "logits/chosen": -3.0048563480377197, "logits/rejected": -2.9592180252075195, "logps/chosen": -355.81793212890625, "logps/rejected": -223.06158447265625, "loss": 0.6845, "rewards/accuracies": 0.75, "rewards/chosen": -0.4326724410057068, "rewards/margins": 0.44764286279678345, "rewards/rejected": -0.8803153038024902, "step": 1122 }, { "epoch": 0.13, "learning_rate": 2.6516446213274024e-07, "logits/chosen": -2.7078959941864014, "logits/rejected": -2.700967788696289, "logps/chosen": -294.5287170410156, "logps/rejected": -214.2689666748047, "loss": 0.3838, "rewards/accuracies": 0.875, "rewards/chosen": -0.1467777043581009, "rewards/margins": 0.9895541667938232, "rewards/rejected": -1.1363317966461182, "step": 1123 }, { "epoch": 0.13, "learning_rate": 2.65129345663116e-07, "logits/chosen": -3.276919364929199, "logits/rejected": -3.485095739364624, "logps/chosen": -152.3216552734375, "logps/rejected": -181.8409423828125, "loss": 0.2962, "rewards/accuracies": 1.0, "rewards/chosen": 0.1355457752943039, "rewards/margins": 1.6287325620651245, "rewards/rejected": -1.4931867122650146, "step": 1124 }, { "epoch": 0.13, "learning_rate": 2.6509422919349175e-07, "logits/chosen": -2.919196605682373, "logits/rejected": -2.790947675704956, "logps/chosen": -538.43994140625, "logps/rejected": -286.8423767089844, "loss": 0.8325, "rewards/accuracies": 0.625, "rewards/chosen": -0.058187201619148254, "rewards/margins": 0.30952396988868713, "rewards/rejected": -0.36771121621131897, "step": 1125 }, { "epoch": 0.13, "learning_rate": 2.6505911272386745e-07, "logits/chosen": -2.921529769897461, "logits/rejected": -3.069629669189453, "logps/chosen": -388.2000427246094, "logps/rejected": -261.0193786621094, "loss": 0.5618, "rewards/accuracies": 0.625, "rewards/chosen": 0.41262391209602356, "rewards/margins": 0.421667218208313, "rewards/rejected": -0.009043306112289429, "step": 1126 }, { "epoch": 0.13, "learning_rate": 2.6502399625424326e-07, "logits/chosen": -2.9504518508911133, "logits/rejected": -3.107922315597534, "logps/chosen": -121.18948364257812, "logps/rejected": -274.86407470703125, "loss": 0.4565, "rewards/accuracies": 0.875, "rewards/chosen": 0.46845734119415283, "rewards/margins": 1.2508995532989502, "rewards/rejected": -0.7824422121047974, "step": 1127 }, { "epoch": 0.13, "learning_rate": 2.64988879784619e-07, "logits/chosen": -3.111607789993286, "logits/rejected": -3.314397096633911, "logps/chosen": -153.505615234375, "logps/rejected": -221.0738983154297, "loss": 0.2983, "rewards/accuracies": 1.0, "rewards/chosen": 0.3992217183113098, "rewards/margins": 1.3036235570907593, "rewards/rejected": -0.9044018983840942, "step": 1128 }, { "epoch": 0.13, "learning_rate": 2.649537633149947e-07, "logits/chosen": -3.0053534507751465, "logits/rejected": -3.0161592960357666, "logps/chosen": -272.62664794921875, "logps/rejected": -172.41371154785156, "loss": 0.6409, "rewards/accuracies": 0.875, "rewards/chosen": -0.41247886419296265, "rewards/margins": 0.5148661732673645, "rewards/rejected": -0.9273449778556824, "step": 1129 }, { "epoch": 0.13, "learning_rate": 2.6491864684537046e-07, "logits/chosen": -2.398597002029419, "logits/rejected": -2.696943998336792, "logps/chosen": -188.15185546875, "logps/rejected": -132.21063232421875, "loss": 0.615, "rewards/accuracies": 0.5, "rewards/chosen": -0.060358427464962006, "rewards/margins": 0.21854951977729797, "rewards/rejected": -0.2789079546928406, "step": 1130 }, { "epoch": 0.13, "learning_rate": 2.648835303757462e-07, "logits/chosen": -3.0550076961517334, "logits/rejected": -2.8901286125183105, "logps/chosen": -392.510498046875, "logps/rejected": -505.06134033203125, "loss": 0.4503, "rewards/accuracies": 0.75, "rewards/chosen": -0.31051135063171387, "rewards/margins": 1.561906099319458, "rewards/rejected": -1.8724174499511719, "step": 1131 }, { "epoch": 0.13, "learning_rate": 2.6484841390612197e-07, "logits/chosen": -3.2196896076202393, "logits/rejected": -3.2791953086853027, "logps/chosen": -130.60301208496094, "logps/rejected": -175.105224609375, "loss": 0.6484, "rewards/accuracies": 0.5, "rewards/chosen": 0.07421918958425522, "rewards/margins": 0.8465479612350464, "rewards/rejected": -0.7723287343978882, "step": 1132 }, { "epoch": 0.13, "learning_rate": 2.648132974364977e-07, "logits/chosen": -3.818188190460205, "logits/rejected": -3.7487478256225586, "logps/chosen": -237.87509155273438, "logps/rejected": -215.31460571289062, "loss": 0.537, "rewards/accuracies": 0.75, "rewards/chosen": -0.11975985765457153, "rewards/margins": 0.8998790383338928, "rewards/rejected": -1.019639015197754, "step": 1133 }, { "epoch": 0.13, "learning_rate": 2.6477818096687343e-07, "logits/chosen": -2.8150267601013184, "logits/rejected": -2.9363014698028564, "logps/chosen": -335.4690246582031, "logps/rejected": -257.10845947265625, "loss": 0.506, "rewards/accuracies": 0.625, "rewards/chosen": -0.3079419434070587, "rewards/margins": 0.6567952632904053, "rewards/rejected": -0.9647372961044312, "step": 1134 }, { "epoch": 0.13, "learning_rate": 2.647430644972492e-07, "logits/chosen": -3.524766206741333, "logits/rejected": -3.2200090885162354, "logps/chosen": -422.7547912597656, "logps/rejected": -306.89923095703125, "loss": 0.4998, "rewards/accuracies": 0.75, "rewards/chosen": -0.01270398497581482, "rewards/margins": 0.7448458671569824, "rewards/rejected": -0.7575498819351196, "step": 1135 }, { "epoch": 0.13, "learning_rate": 2.6470794802762493e-07, "logits/chosen": -3.0551304817199707, "logits/rejected": -3.240947723388672, "logps/chosen": -324.19525146484375, "logps/rejected": -278.0753479003906, "loss": 0.2814, "rewards/accuracies": 0.875, "rewards/chosen": 0.37303459644317627, "rewards/margins": 1.9975358247756958, "rewards/rejected": -1.6245012283325195, "step": 1136 }, { "epoch": 0.13, "learning_rate": 2.646728315580007e-07, "logits/chosen": -3.1667635440826416, "logits/rejected": -2.9411778450012207, "logps/chosen": -164.29660034179688, "logps/rejected": -266.9853515625, "loss": 0.3283, "rewards/accuracies": 0.875, "rewards/chosen": -0.041577816009521484, "rewards/margins": 2.3180017471313477, "rewards/rejected": -2.359579563140869, "step": 1137 }, { "epoch": 0.13, "learning_rate": 2.6463771508837644e-07, "logits/chosen": -2.3710896968841553, "logits/rejected": -2.5383169651031494, "logps/chosen": -450.00567626953125, "logps/rejected": -319.1898193359375, "loss": 0.4053, "rewards/accuracies": 0.875, "rewards/chosen": 0.3168097734451294, "rewards/margins": 1.349205732345581, "rewards/rejected": -1.0323959589004517, "step": 1138 }, { "epoch": 0.13, "learning_rate": 2.646025986187522e-07, "logits/chosen": -3.3401284217834473, "logits/rejected": -3.5005054473876953, "logps/chosen": -66.05833435058594, "logps/rejected": -163.06019592285156, "loss": 0.5837, "rewards/accuracies": 0.5, "rewards/chosen": -0.03818170726299286, "rewards/margins": 0.8913490176200867, "rewards/rejected": -0.929530680179596, "step": 1139 }, { "epoch": 0.13, "learning_rate": 2.6456748214912795e-07, "logits/chosen": -3.113823652267456, "logits/rejected": -3.3339028358459473, "logps/chosen": -336.054931640625, "logps/rejected": -253.0380096435547, "loss": 0.3977, "rewards/accuracies": 0.75, "rewards/chosen": 0.2362760752439499, "rewards/margins": 1.52045738697052, "rewards/rejected": -1.2841812372207642, "step": 1140 }, { "epoch": 0.13, "learning_rate": 2.645323656795037e-07, "logits/chosen": -3.5555615425109863, "logits/rejected": -3.581885576248169, "logps/chosen": -404.1273498535156, "logps/rejected": -256.18072509765625, "loss": 0.3216, "rewards/accuracies": 0.875, "rewards/chosen": 0.08714514225721359, "rewards/margins": 1.3914008140563965, "rewards/rejected": -1.3042556047439575, "step": 1141 }, { "epoch": 0.13, "learning_rate": 2.644972492098794e-07, "logits/chosen": -3.1227328777313232, "logits/rejected": -3.675710678100586, "logps/chosen": -127.75099182128906, "logps/rejected": -276.4294128417969, "loss": 0.4435, "rewards/accuracies": 0.75, "rewards/chosen": 0.00015526264905929565, "rewards/margins": 1.424871802330017, "rewards/rejected": -1.4247164726257324, "step": 1142 }, { "epoch": 0.13, "learning_rate": 2.6446213274025516e-07, "logits/chosen": -2.8072571754455566, "logits/rejected": -2.714836359024048, "logps/chosen": -183.17105102539062, "logps/rejected": -163.0018310546875, "loss": 0.4076, "rewards/accuracies": 1.0, "rewards/chosen": 0.1967604011297226, "rewards/margins": 0.74781733751297, "rewards/rejected": -0.5510568618774414, "step": 1143 }, { "epoch": 0.13, "learning_rate": 2.644270162706309e-07, "logits/chosen": -3.483196258544922, "logits/rejected": -3.71005916595459, "logps/chosen": -211.03248596191406, "logps/rejected": -228.783203125, "loss": 0.2445, "rewards/accuracies": 0.875, "rewards/chosen": 0.005968952551484108, "rewards/margins": 2.059837818145752, "rewards/rejected": -2.0538690090179443, "step": 1144 }, { "epoch": 0.13, "learning_rate": 2.6439189980100667e-07, "logits/chosen": -3.1735541820526123, "logits/rejected": -3.2536001205444336, "logps/chosen": -313.92059326171875, "logps/rejected": -222.73507690429688, "loss": 0.5248, "rewards/accuracies": 0.75, "rewards/chosen": 0.028326570987701416, "rewards/margins": 1.348564624786377, "rewards/rejected": -1.3202379941940308, "step": 1145 }, { "epoch": 0.13, "learning_rate": 2.643567833313824e-07, "logits/chosen": -3.06083607673645, "logits/rejected": -3.1605420112609863, "logps/chosen": -458.60009765625, "logps/rejected": -479.2228088378906, "loss": 0.2074, "rewards/accuracies": 1.0, "rewards/chosen": 0.3232947885990143, "rewards/margins": 2.071357011795044, "rewards/rejected": -1.7480623722076416, "step": 1146 }, { "epoch": 0.13, "learning_rate": 2.643216668617581e-07, "logits/chosen": -2.5362708568573, "logits/rejected": -2.726102352142334, "logps/chosen": -332.25128173828125, "logps/rejected": -321.62591552734375, "loss": 0.3376, "rewards/accuracies": 0.75, "rewards/chosen": 0.05548195540904999, "rewards/margins": 1.3431968688964844, "rewards/rejected": -1.287714958190918, "step": 1147 }, { "epoch": 0.13, "learning_rate": 2.642865503921339e-07, "logits/chosen": -3.7037458419799805, "logits/rejected": -3.8020386695861816, "logps/chosen": -265.3013916015625, "logps/rejected": -235.034423828125, "loss": 0.5098, "rewards/accuracies": 0.625, "rewards/chosen": -0.32024362683296204, "rewards/margins": 0.7788153290748596, "rewards/rejected": -1.0990588665008545, "step": 1148 }, { "epoch": 0.13, "learning_rate": 2.642514339225097e-07, "logits/chosen": -2.742373466491699, "logits/rejected": -2.7074027061462402, "logps/chosen": -208.57275390625, "logps/rejected": -198.40682983398438, "loss": 0.4931, "rewards/accuracies": 0.625, "rewards/chosen": 0.12414845824241638, "rewards/margins": 0.7232298254966736, "rewards/rejected": -0.5990813374519348, "step": 1149 }, { "epoch": 0.13, "learning_rate": 2.642163174528854e-07, "logits/chosen": -3.285632610321045, "logits/rejected": -3.0850794315338135, "logps/chosen": -183.4230194091797, "logps/rejected": -170.808837890625, "loss": 0.6424, "rewards/accuracies": 0.625, "rewards/chosen": 0.1261528879404068, "rewards/margins": 0.5317463278770447, "rewards/rejected": -0.4055934250354767, "step": 1150 }, { "epoch": 0.13, "learning_rate": 2.6418120098326114e-07, "logits/chosen": -2.9554035663604736, "logits/rejected": -2.7266845703125, "logps/chosen": -163.44700622558594, "logps/rejected": -205.77630615234375, "loss": 0.4633, "rewards/accuracies": 0.75, "rewards/chosen": 0.044466301798820496, "rewards/margins": 0.8958410620689392, "rewards/rejected": -0.8513747453689575, "step": 1151 }, { "epoch": 0.13, "learning_rate": 2.641460845136369e-07, "logits/chosen": -3.2066683769226074, "logits/rejected": -2.8116397857666016, "logps/chosen": -165.21005249023438, "logps/rejected": -190.33555603027344, "loss": 0.3289, "rewards/accuracies": 0.875, "rewards/chosen": 0.28757211565971375, "rewards/margins": 1.0682761669158936, "rewards/rejected": -0.780704140663147, "step": 1152 }, { "epoch": 0.13, "learning_rate": 2.6411096804401264e-07, "logits/chosen": -3.3010871410369873, "logits/rejected": -3.458136558532715, "logps/chosen": -199.72857666015625, "logps/rejected": -227.2401123046875, "loss": 0.4695, "rewards/accuracies": 1.0, "rewards/chosen": -0.2094159573316574, "rewards/margins": 0.8545058965682983, "rewards/rejected": -1.0639218091964722, "step": 1153 }, { "epoch": 0.13, "learning_rate": 2.640758515743884e-07, "logits/chosen": -3.487640142440796, "logits/rejected": -3.188237428665161, "logps/chosen": -269.1490478515625, "logps/rejected": -241.99160766601562, "loss": 0.6235, "rewards/accuracies": 0.75, "rewards/chosen": 0.23445971310138702, "rewards/margins": 0.6877169609069824, "rewards/rejected": -0.4532572031021118, "step": 1154 }, { "epoch": 0.13, "learning_rate": 2.640407351047641e-07, "logits/chosen": -2.62355637550354, "logits/rejected": -2.727726459503174, "logps/chosen": -291.8756103515625, "logps/rejected": -201.43405151367188, "loss": 0.2658, "rewards/accuracies": 1.0, "rewards/chosen": 0.4572782516479492, "rewards/margins": 1.4590744972229004, "rewards/rejected": -1.0017961263656616, "step": 1155 }, { "epoch": 0.13, "learning_rate": 2.6400561863513985e-07, "logits/chosen": -2.646324396133423, "logits/rejected": -2.8772103786468506, "logps/chosen": -131.6094512939453, "logps/rejected": -212.9236297607422, "loss": 0.3245, "rewards/accuracies": 1.0, "rewards/chosen": -0.2445332407951355, "rewards/margins": 1.5212008953094482, "rewards/rejected": -1.765734076499939, "step": 1156 }, { "epoch": 0.13, "learning_rate": 2.639705021655156e-07, "logits/chosen": -3.822502613067627, "logits/rejected": -3.9203057289123535, "logps/chosen": -281.3533630371094, "logps/rejected": -298.17938232421875, "loss": 0.2597, "rewards/accuracies": 1.0, "rewards/chosen": -0.14304950833320618, "rewards/margins": 1.909485936164856, "rewards/rejected": -2.05253529548645, "step": 1157 }, { "epoch": 0.13, "learning_rate": 2.6393538569589136e-07, "logits/chosen": -3.2148208618164062, "logits/rejected": -3.26446533203125, "logps/chosen": -431.0948181152344, "logps/rejected": -238.06103515625, "loss": 0.3958, "rewards/accuracies": 0.875, "rewards/chosen": -0.2417408525943756, "rewards/margins": 1.1710360050201416, "rewards/rejected": -1.4127769470214844, "step": 1158 }, { "epoch": 0.13, "learning_rate": 2.639002692262671e-07, "logits/chosen": -3.327244281768799, "logits/rejected": -3.8737828731536865, "logps/chosen": -216.22923278808594, "logps/rejected": -252.5621337890625, "loss": 0.4256, "rewards/accuracies": 0.875, "rewards/chosen": 0.5626663565635681, "rewards/margins": 1.4025487899780273, "rewards/rejected": -0.839882493019104, "step": 1159 }, { "epoch": 0.13, "learning_rate": 2.638651527566428e-07, "logits/chosen": -2.4803550243377686, "logits/rejected": -2.5593791007995605, "logps/chosen": -202.14480590820312, "logps/rejected": -303.0001220703125, "loss": 0.5275, "rewards/accuracies": 0.625, "rewards/chosen": -0.03640022873878479, "rewards/margins": 1.2050468921661377, "rewards/rejected": -1.2414470911026, "step": 1160 }, { "epoch": 0.13, "learning_rate": 2.638300362870186e-07, "logits/chosen": -2.804436683654785, "logits/rejected": -2.898144483566284, "logps/chosen": -365.42987060546875, "logps/rejected": -222.9817657470703, "loss": 0.9608, "rewards/accuracies": 0.625, "rewards/chosen": -0.6515011787414551, "rewards/margins": 0.8350887298583984, "rewards/rejected": -1.486589789390564, "step": 1161 }, { "epoch": 0.13, "learning_rate": 2.637949198173944e-07, "logits/chosen": -2.5278637409210205, "logits/rejected": -2.7136406898498535, "logps/chosen": -274.7254638671875, "logps/rejected": -264.2034606933594, "loss": 0.5115, "rewards/accuracies": 0.5, "rewards/chosen": -0.3426119387149811, "rewards/margins": 0.9534878730773926, "rewards/rejected": -1.2960999011993408, "step": 1162 }, { "epoch": 0.13, "learning_rate": 2.637598033477701e-07, "logits/chosen": -3.1660380363464355, "logits/rejected": -3.440300464630127, "logps/chosen": -152.2317352294922, "logps/rejected": -265.6800537109375, "loss": 0.4085, "rewards/accuracies": 0.875, "rewards/chosen": 0.12988163530826569, "rewards/margins": 2.6952085494995117, "rewards/rejected": -2.565326690673828, "step": 1163 }, { "epoch": 0.13, "learning_rate": 2.6372468687814583e-07, "logits/chosen": -3.953016757965088, "logits/rejected": -4.006649017333984, "logps/chosen": -146.94737243652344, "logps/rejected": -227.32015991210938, "loss": 0.3026, "rewards/accuracies": 0.875, "rewards/chosen": -0.07558516412973404, "rewards/margins": 1.61501944065094, "rewards/rejected": -1.690604567527771, "step": 1164 }, { "epoch": 0.13, "learning_rate": 2.636895704085216e-07, "logits/chosen": -3.59645938873291, "logits/rejected": -3.3608999252319336, "logps/chosen": -252.2806854248047, "logps/rejected": -200.71478271484375, "loss": 0.3504, "rewards/accuracies": 0.875, "rewards/chosen": 0.029424652457237244, "rewards/margins": 1.357444167137146, "rewards/rejected": -1.3280194997787476, "step": 1165 }, { "epoch": 0.13, "learning_rate": 2.6365445393889734e-07, "logits/chosen": -3.1203033924102783, "logits/rejected": -2.9929466247558594, "logps/chosen": -206.53280639648438, "logps/rejected": -180.3179168701172, "loss": 0.3933, "rewards/accuracies": 0.875, "rewards/chosen": -0.036619167774915695, "rewards/margins": 0.8788002729415894, "rewards/rejected": -0.9154193997383118, "step": 1166 }, { "epoch": 0.13, "learning_rate": 2.636193374692731e-07, "logits/chosen": -3.5457210540771484, "logits/rejected": -3.2167935371398926, "logps/chosen": -252.66542053222656, "logps/rejected": -160.60671997070312, "loss": 0.4133, "rewards/accuracies": 0.875, "rewards/chosen": 0.2558909058570862, "rewards/margins": 1.053233027458191, "rewards/rejected": -0.7973421216011047, "step": 1167 }, { "epoch": 0.13, "learning_rate": 2.635842209996488e-07, "logits/chosen": -3.5444822311401367, "logits/rejected": -3.1382288932800293, "logps/chosen": -464.6197509765625, "logps/rejected": -265.07501220703125, "loss": 0.3242, "rewards/accuracies": 0.875, "rewards/chosen": -0.2711925804615021, "rewards/margins": 1.4047973155975342, "rewards/rejected": -1.6759898662567139, "step": 1168 }, { "epoch": 0.13, "learning_rate": 2.6354910453002455e-07, "logits/chosen": -2.88092041015625, "logits/rejected": -2.92826509475708, "logps/chosen": -300.991943359375, "logps/rejected": -177.91783142089844, "loss": 0.4204, "rewards/accuracies": 0.875, "rewards/chosen": 0.45847827196121216, "rewards/margins": 0.976760983467102, "rewards/rejected": -0.5182826519012451, "step": 1169 }, { "epoch": 0.13, "learning_rate": 2.6351398806040035e-07, "logits/chosen": -3.030751943588257, "logits/rejected": -2.9094910621643066, "logps/chosen": -257.4676513671875, "logps/rejected": -175.7664031982422, "loss": 0.4556, "rewards/accuracies": 0.875, "rewards/chosen": -0.022378571331501007, "rewards/margins": 0.94871985912323, "rewards/rejected": -0.9710984230041504, "step": 1170 }, { "epoch": 0.13, "learning_rate": 2.6347887159077605e-07, "logits/chosen": -3.7276697158813477, "logits/rejected": -3.109769344329834, "logps/chosen": -336.2955322265625, "logps/rejected": -313.4561767578125, "loss": 0.5864, "rewards/accuracies": 0.75, "rewards/chosen": -0.1274830847978592, "rewards/margins": 0.44493913650512695, "rewards/rejected": -0.5724222660064697, "step": 1171 }, { "epoch": 0.14, "learning_rate": 2.634437551211518e-07, "logits/chosen": -3.2787599563598633, "logits/rejected": -2.883723497390747, "logps/chosen": -323.1746826171875, "logps/rejected": -269.3150634765625, "loss": 0.4465, "rewards/accuracies": 0.75, "rewards/chosen": 0.26428043842315674, "rewards/margins": 1.0590006113052368, "rewards/rejected": -0.7947201728820801, "step": 1172 }, { "epoch": 0.14, "learning_rate": 2.6340863865152756e-07, "logits/chosen": -3.4941961765289307, "logits/rejected": -3.412862777709961, "logps/chosen": -101.77346801757812, "logps/rejected": -161.71405029296875, "loss": 0.486, "rewards/accuracies": 0.75, "rewards/chosen": -0.1525445580482483, "rewards/margins": 1.894465446472168, "rewards/rejected": -2.0470099449157715, "step": 1173 }, { "epoch": 0.14, "learning_rate": 2.633735221819033e-07, "logits/chosen": -3.2377138137817383, "logits/rejected": -3.7454915046691895, "logps/chosen": -191.99063110351562, "logps/rejected": -236.19329833984375, "loss": 0.3064, "rewards/accuracies": 0.875, "rewards/chosen": -0.226438969373703, "rewards/margins": 1.5194885730743408, "rewards/rejected": -1.7459276914596558, "step": 1174 }, { "epoch": 0.14, "learning_rate": 2.6333840571227907e-07, "logits/chosen": -3.5576388835906982, "logits/rejected": -3.320265293121338, "logps/chosen": -347.0915832519531, "logps/rejected": -266.872802734375, "loss": 0.5172, "rewards/accuracies": 0.75, "rewards/chosen": -0.529593288898468, "rewards/margins": 0.8514747023582458, "rewards/rejected": -1.3810679912567139, "step": 1175 }, { "epoch": 0.14, "learning_rate": 2.6330328924265477e-07, "logits/chosen": -3.101223945617676, "logits/rejected": -3.2820241451263428, "logps/chosen": -386.18341064453125, "logps/rejected": -237.38047790527344, "loss": 0.4948, "rewards/accuracies": 0.625, "rewards/chosen": 0.23741579055786133, "rewards/margins": 0.8363604545593262, "rewards/rejected": -0.5989446640014648, "step": 1176 }, { "epoch": 0.14, "learning_rate": 2.632681727730305e-07, "logits/chosen": -3.100520372390747, "logits/rejected": -3.142585277557373, "logps/chosen": -249.1744842529297, "logps/rejected": -230.4940643310547, "loss": 0.4263, "rewards/accuracies": 0.75, "rewards/chosen": -0.1742902398109436, "rewards/margins": 1.793858528137207, "rewards/rejected": -1.9681488275527954, "step": 1177 }, { "epoch": 0.14, "learning_rate": 2.632330563034063e-07, "logits/chosen": -3.573251962661743, "logits/rejected": -3.4033362865448, "logps/chosen": -87.0963363647461, "logps/rejected": -143.77487182617188, "loss": 0.5074, "rewards/accuracies": 0.625, "rewards/chosen": 0.14743776619434357, "rewards/margins": 0.6411831378936768, "rewards/rejected": -0.4937452971935272, "step": 1178 }, { "epoch": 0.14, "learning_rate": 2.6319793983378203e-07, "logits/chosen": -2.7285749912261963, "logits/rejected": -2.8543498516082764, "logps/chosen": -259.2212829589844, "logps/rejected": -320.10748291015625, "loss": 0.34, "rewards/accuracies": 0.875, "rewards/chosen": -0.06968726217746735, "rewards/margins": 1.613879680633545, "rewards/rejected": -1.6835670471191406, "step": 1179 }, { "epoch": 0.14, "learning_rate": 2.631628233641578e-07, "logits/chosen": -2.950324535369873, "logits/rejected": -3.041490077972412, "logps/chosen": -315.1473083496094, "logps/rejected": -242.81930541992188, "loss": 0.5288, "rewards/accuracies": 0.75, "rewards/chosen": 0.1525687724351883, "rewards/margins": 0.6952160000801086, "rewards/rejected": -0.5426473021507263, "step": 1180 }, { "epoch": 0.14, "learning_rate": 2.631277068945335e-07, "logits/chosen": -3.0589451789855957, "logits/rejected": -2.7982590198516846, "logps/chosen": -448.7673034667969, "logps/rejected": -301.37115478515625, "loss": 0.1305, "rewards/accuracies": 1.0, "rewards/chosen": 1.030834436416626, "rewards/margins": 2.1853466033935547, "rewards/rejected": -1.1545119285583496, "step": 1181 }, { "epoch": 0.14, "learning_rate": 2.6309259042490924e-07, "logits/chosen": -2.3934059143066406, "logits/rejected": -2.337977647781372, "logps/chosen": -215.0846405029297, "logps/rejected": -180.48843383789062, "loss": 0.4268, "rewards/accuracies": 1.0, "rewards/chosen": 0.07917890697717667, "rewards/margins": 0.8075821995735168, "rewards/rejected": -0.7284033298492432, "step": 1182 }, { "epoch": 0.14, "learning_rate": 2.6305747395528505e-07, "logits/chosen": -2.7609660625457764, "logits/rejected": -2.9934773445129395, "logps/chosen": -311.5391540527344, "logps/rejected": -224.38735961914062, "loss": 0.4512, "rewards/accuracies": 0.625, "rewards/chosen": 0.37749576568603516, "rewards/margins": 0.8909459114074707, "rewards/rejected": -0.5134501457214355, "step": 1183 }, { "epoch": 0.14, "learning_rate": 2.6302235748566075e-07, "logits/chosen": -3.3895435333251953, "logits/rejected": -3.5497915744781494, "logps/chosen": -128.94158935546875, "logps/rejected": -125.99014282226562, "loss": 0.5472, "rewards/accuracies": 0.5, "rewards/chosen": 0.2376810610294342, "rewards/margins": 0.65836501121521, "rewards/rejected": -0.42068392038345337, "step": 1184 }, { "epoch": 0.14, "learning_rate": 2.629872410160365e-07, "logits/chosen": -2.526494026184082, "logits/rejected": -2.765414237976074, "logps/chosen": -207.33148193359375, "logps/rejected": -300.87548828125, "loss": 0.4863, "rewards/accuracies": 0.75, "rewards/chosen": -0.19567319750785828, "rewards/margins": 1.3545764684677124, "rewards/rejected": -1.5502498149871826, "step": 1185 }, { "epoch": 0.14, "learning_rate": 2.6295212454641226e-07, "logits/chosen": -2.939251661300659, "logits/rejected": -2.755117177963257, "logps/chosen": -183.38392639160156, "logps/rejected": -202.21408081054688, "loss": 0.3868, "rewards/accuracies": 1.0, "rewards/chosen": 0.009866468608379364, "rewards/margins": 1.3239699602127075, "rewards/rejected": -1.3141034841537476, "step": 1186 }, { "epoch": 0.14, "learning_rate": 2.62917008076788e-07, "logits/chosen": -3.3001906871795654, "logits/rejected": -3.465632915496826, "logps/chosen": -287.233642578125, "logps/rejected": -273.9935302734375, "loss": 0.3337, "rewards/accuracies": 0.875, "rewards/chosen": -0.3053727447986603, "rewards/margins": 1.4368269443511963, "rewards/rejected": -1.7421997785568237, "step": 1187 }, { "epoch": 0.14, "learning_rate": 2.6288189160716376e-07, "logits/chosen": -3.176413059234619, "logits/rejected": -3.1112475395202637, "logps/chosen": -185.18736267089844, "logps/rejected": -236.84011840820312, "loss": 0.2548, "rewards/accuracies": 1.0, "rewards/chosen": 0.02018701285123825, "rewards/margins": 1.5580048561096191, "rewards/rejected": -1.5378179550170898, "step": 1188 }, { "epoch": 0.14, "learning_rate": 2.6284677513753947e-07, "logits/chosen": -3.434058666229248, "logits/rejected": -3.3798446655273438, "logps/chosen": -280.39312744140625, "logps/rejected": -304.622802734375, "loss": 0.5577, "rewards/accuracies": 0.75, "rewards/chosen": -0.39674603939056396, "rewards/margins": 0.48376649618148804, "rewards/rejected": -0.880512535572052, "step": 1189 }, { "epoch": 0.14, "learning_rate": 2.628116586679152e-07, "logits/chosen": -3.294607639312744, "logits/rejected": -3.136362075805664, "logps/chosen": -232.37252807617188, "logps/rejected": -138.50704956054688, "loss": 0.6187, "rewards/accuracies": 0.5, "rewards/chosen": -0.0002674981951713562, "rewards/margins": 0.31020474433898926, "rewards/rejected": -0.3104722499847412, "step": 1190 }, { "epoch": 0.14, "learning_rate": 2.6277654219829097e-07, "logits/chosen": -3.156045436859131, "logits/rejected": -3.253831624984741, "logps/chosen": -296.0826416015625, "logps/rejected": -253.27841186523438, "loss": 0.1435, "rewards/accuracies": 1.0, "rewards/chosen": 0.5889577865600586, "rewards/margins": 2.6336612701416016, "rewards/rejected": -2.044703245162964, "step": 1191 }, { "epoch": 0.14, "learning_rate": 2.6274142572866673e-07, "logits/chosen": -2.7620837688446045, "logits/rejected": -2.728872060775757, "logps/chosen": -103.6409912109375, "logps/rejected": -97.89027404785156, "loss": 0.7472, "rewards/accuracies": 0.625, "rewards/chosen": -0.25599542260169983, "rewards/margins": 0.030623771250247955, "rewards/rejected": -0.2866191864013672, "step": 1192 }, { "epoch": 0.14, "learning_rate": 2.627063092590425e-07, "logits/chosen": -3.0368871688842773, "logits/rejected": -2.8645498752593994, "logps/chosen": -206.26072692871094, "logps/rejected": -187.91006469726562, "loss": 0.5278, "rewards/accuracies": 0.75, "rewards/chosen": -0.17441660165786743, "rewards/margins": 0.44798439741134644, "rewards/rejected": -0.6224009990692139, "step": 1193 }, { "epoch": 0.14, "learning_rate": 2.6267119278941823e-07, "logits/chosen": -2.9795985221862793, "logits/rejected": -3.218921661376953, "logps/chosen": -410.1129455566406, "logps/rejected": -461.47027587890625, "loss": 0.2911, "rewards/accuracies": 1.0, "rewards/chosen": 0.5375205874443054, "rewards/margins": 1.5954796075820923, "rewards/rejected": -1.0579591989517212, "step": 1194 }, { "epoch": 0.14, "learning_rate": 2.62636076319794e-07, "logits/chosen": -3.753042697906494, "logits/rejected": -3.6333470344543457, "logps/chosen": -187.22178649902344, "logps/rejected": -187.4674530029297, "loss": 0.5789, "rewards/accuracies": 0.875, "rewards/chosen": -0.57789146900177, "rewards/margins": 0.8073467016220093, "rewards/rejected": -1.3852381706237793, "step": 1195 }, { "epoch": 0.14, "learning_rate": 2.6260095985016974e-07, "logits/chosen": -2.9603824615478516, "logits/rejected": -3.2064013481140137, "logps/chosen": -277.5440673828125, "logps/rejected": -266.9850769042969, "loss": 0.7151, "rewards/accuracies": 0.625, "rewards/chosen": -0.5664582848548889, "rewards/margins": 0.5947904586791992, "rewards/rejected": -1.1612486839294434, "step": 1196 }, { "epoch": 0.14, "learning_rate": 2.6256584338054544e-07, "logits/chosen": -3.846341848373413, "logits/rejected": -3.5898423194885254, "logps/chosen": -194.23770141601562, "logps/rejected": -304.7276611328125, "loss": 0.4886, "rewards/accuracies": 0.875, "rewards/chosen": -0.453357994556427, "rewards/margins": 0.8248500227928162, "rewards/rejected": -1.2782080173492432, "step": 1197 }, { "epoch": 0.14, "learning_rate": 2.625307269109212e-07, "logits/chosen": -3.616652488708496, "logits/rejected": -3.5786354541778564, "logps/chosen": -305.54388427734375, "logps/rejected": -181.3466339111328, "loss": 0.4595, "rewards/accuracies": 0.75, "rewards/chosen": 0.08782371878623962, "rewards/margins": 1.290541410446167, "rewards/rejected": -1.202717661857605, "step": 1198 }, { "epoch": 0.14, "learning_rate": 2.6249561044129695e-07, "logits/chosen": -3.1046082973480225, "logits/rejected": -3.1202735900878906, "logps/chosen": -167.1373291015625, "logps/rejected": -262.22149658203125, "loss": 0.7328, "rewards/accuracies": 0.75, "rewards/chosen": -0.23059387505054474, "rewards/margins": 0.7052990198135376, "rewards/rejected": -0.9358928799629211, "step": 1199 }, { "epoch": 0.14, "learning_rate": 2.624604939716727e-07, "logits/chosen": -3.6484487056732178, "logits/rejected": -3.410665988922119, "logps/chosen": -196.5653076171875, "logps/rejected": -286.01800537109375, "loss": 0.3786, "rewards/accuracies": 0.75, "rewards/chosen": -0.17695683240890503, "rewards/margins": 2.156592845916748, "rewards/rejected": -2.333549976348877, "step": 1200 }, { "epoch": 0.14, "learning_rate": 2.6242537750204846e-07, "logits/chosen": -3.497046947479248, "logits/rejected": -3.4260759353637695, "logps/chosen": -103.20014953613281, "logps/rejected": -167.691162109375, "loss": 0.4766, "rewards/accuracies": 0.875, "rewards/chosen": -0.020122356712818146, "rewards/margins": 0.8744657635688782, "rewards/rejected": -0.8945881128311157, "step": 1201 }, { "epoch": 0.14, "learning_rate": 2.623902610324242e-07, "logits/chosen": -3.442551612854004, "logits/rejected": -3.2935521602630615, "logps/chosen": -403.45306396484375, "logps/rejected": -298.0394287109375, "loss": 0.8709, "rewards/accuracies": 0.5, "rewards/chosen": -0.41956818103790283, "rewards/margins": 0.3612107038497925, "rewards/rejected": -0.7807788848876953, "step": 1202 }, { "epoch": 0.14, "learning_rate": 2.623551445627999e-07, "logits/chosen": -3.6135380268096924, "logits/rejected": -3.618638038635254, "logps/chosen": -296.8923645019531, "logps/rejected": -347.677001953125, "loss": 0.3003, "rewards/accuracies": 1.0, "rewards/chosen": 0.4914509057998657, "rewards/margins": 1.7996132373809814, "rewards/rejected": -1.3081622123718262, "step": 1203 }, { "epoch": 0.14, "learning_rate": 2.623200280931757e-07, "logits/chosen": -2.6543173789978027, "logits/rejected": -2.757397413253784, "logps/chosen": -208.95167541503906, "logps/rejected": -248.06454467773438, "loss": 0.2899, "rewards/accuracies": 0.875, "rewards/chosen": -0.2177644670009613, "rewards/margins": 1.8779759407043457, "rewards/rejected": -2.09574031829834, "step": 1204 }, { "epoch": 0.14, "learning_rate": 2.622849116235514e-07, "logits/chosen": -3.0110507011413574, "logits/rejected": -2.8109912872314453, "logps/chosen": -454.27880859375, "logps/rejected": -273.0755920410156, "loss": 0.3737, "rewards/accuracies": 0.75, "rewards/chosen": 0.04671168327331543, "rewards/margins": 1.133287787437439, "rewards/rejected": -1.0865761041641235, "step": 1205 }, { "epoch": 0.14, "learning_rate": 2.622497951539272e-07, "logits/chosen": -2.9705848693847656, "logits/rejected": -3.4963574409484863, "logps/chosen": -184.06619262695312, "logps/rejected": -175.2967529296875, "loss": 0.6394, "rewards/accuracies": 0.625, "rewards/chosen": -0.29350727796554565, "rewards/margins": 1.7692946195602417, "rewards/rejected": -2.0628018379211426, "step": 1206 }, { "epoch": 0.14, "learning_rate": 2.6221467868430293e-07, "logits/chosen": -3.502842426300049, "logits/rejected": -3.630740165710449, "logps/chosen": -295.95281982421875, "logps/rejected": -211.37509155273438, "loss": 0.4251, "rewards/accuracies": 0.875, "rewards/chosen": 0.12740890681743622, "rewards/margins": 0.9196792840957642, "rewards/rejected": -0.7922704219818115, "step": 1207 }, { "epoch": 0.14, "learning_rate": 2.621795622146787e-07, "logits/chosen": -3.059675455093384, "logits/rejected": -3.207529067993164, "logps/chosen": -296.3916015625, "logps/rejected": -233.34600830078125, "loss": 0.3382, "rewards/accuracies": 0.875, "rewards/chosen": 0.5595195889472961, "rewards/margins": 1.4842607975006104, "rewards/rejected": -0.9247411489486694, "step": 1208 }, { "epoch": 0.14, "learning_rate": 2.6214444574505444e-07, "logits/chosen": -3.7902140617370605, "logits/rejected": -3.2357122898101807, "logps/chosen": -267.97381591796875, "logps/rejected": -255.15994262695312, "loss": 0.8247, "rewards/accuracies": 0.5, "rewards/chosen": -1.2194626331329346, "rewards/margins": -0.16171997785568237, "rewards/rejected": -1.057742714881897, "step": 1209 }, { "epoch": 0.14, "learning_rate": 2.621093292754302e-07, "logits/chosen": -3.1220099925994873, "logits/rejected": -3.432051181793213, "logps/chosen": -213.610107421875, "logps/rejected": -271.35150146484375, "loss": 0.7012, "rewards/accuracies": 0.5, "rewards/chosen": -0.20678411424160004, "rewards/margins": 1.4082266092300415, "rewards/rejected": -1.6150107383728027, "step": 1210 }, { "epoch": 0.14, "learning_rate": 2.620742128058059e-07, "logits/chosen": -3.6795268058776855, "logits/rejected": -3.4376368522644043, "logps/chosen": -392.11285400390625, "logps/rejected": -221.34605407714844, "loss": 0.5037, "rewards/accuracies": 0.75, "rewards/chosen": -0.05437291041016579, "rewards/margins": 0.8223720788955688, "rewards/rejected": -0.8767449259757996, "step": 1211 }, { "epoch": 0.14, "learning_rate": 2.6203909633618164e-07, "logits/chosen": -3.2899718284606934, "logits/rejected": -3.053548574447632, "logps/chosen": -211.36944580078125, "logps/rejected": -204.5604705810547, "loss": 0.4386, "rewards/accuracies": 0.625, "rewards/chosen": -0.1047859638929367, "rewards/margins": 1.0950453281402588, "rewards/rejected": -1.199831247329712, "step": 1212 }, { "epoch": 0.14, "learning_rate": 2.620039798665574e-07, "logits/chosen": -2.5597352981567383, "logits/rejected": -2.5827295780181885, "logps/chosen": -89.36774444580078, "logps/rejected": -145.798095703125, "loss": 0.4773, "rewards/accuracies": 0.75, "rewards/chosen": -0.14074450731277466, "rewards/margins": 1.1211738586425781, "rewards/rejected": -1.261918306350708, "step": 1213 }, { "epoch": 0.14, "learning_rate": 2.6196886339693315e-07, "logits/chosen": -2.7932703495025635, "logits/rejected": -2.922492504119873, "logps/chosen": -181.97044372558594, "logps/rejected": -191.1028594970703, "loss": 0.3136, "rewards/accuracies": 0.875, "rewards/chosen": -0.01868841052055359, "rewards/margins": 1.501734972000122, "rewards/rejected": -1.520423412322998, "step": 1214 }, { "epoch": 0.14, "learning_rate": 2.619337469273089e-07, "logits/chosen": -2.409871816635132, "logits/rejected": -2.48637056350708, "logps/chosen": -335.9388732910156, "logps/rejected": -239.02120971679688, "loss": 0.5348, "rewards/accuracies": 0.625, "rewards/chosen": -0.17619505524635315, "rewards/margins": 1.254032850265503, "rewards/rejected": -1.4302278757095337, "step": 1215 }, { "epoch": 0.14, "learning_rate": 2.618986304576846e-07, "logits/chosen": -2.8044636249542236, "logits/rejected": -3.308803081512451, "logps/chosen": -207.7530059814453, "logps/rejected": -290.6954040527344, "loss": 0.1834, "rewards/accuracies": 0.875, "rewards/chosen": 0.40450558066368103, "rewards/margins": 3.2002768516540527, "rewards/rejected": -2.7957711219787598, "step": 1216 }, { "epoch": 0.14, "learning_rate": 2.618635139880604e-07, "logits/chosen": -2.6024394035339355, "logits/rejected": -2.6902215480804443, "logps/chosen": -385.6015625, "logps/rejected": -362.0187072753906, "loss": 0.2161, "rewards/accuracies": 1.0, "rewards/chosen": -0.27261805534362793, "rewards/margins": 1.67147958278656, "rewards/rejected": -1.944097638130188, "step": 1217 }, { "epoch": 0.14, "learning_rate": 2.6182839751843617e-07, "logits/chosen": -2.7859675884246826, "logits/rejected": -2.707383871078491, "logps/chosen": -257.12530517578125, "logps/rejected": -177.07656860351562, "loss": 0.4865, "rewards/accuracies": 0.75, "rewards/chosen": -0.47198402881622314, "rewards/margins": 0.918461799621582, "rewards/rejected": -1.3904458284378052, "step": 1218 }, { "epoch": 0.14, "learning_rate": 2.6179328104881187e-07, "logits/chosen": -3.0397071838378906, "logits/rejected": -2.8149397373199463, "logps/chosen": -207.83535766601562, "logps/rejected": -202.79832458496094, "loss": 0.4612, "rewards/accuracies": 0.625, "rewards/chosen": 0.6916698217391968, "rewards/margins": 0.7458405494689941, "rewards/rejected": -0.054170697927474976, "step": 1219 }, { "epoch": 0.14, "learning_rate": 2.617581645791876e-07, "logits/chosen": -3.9591684341430664, "logits/rejected": -3.9738426208496094, "logps/chosen": -276.22320556640625, "logps/rejected": -251.22706604003906, "loss": 0.3134, "rewards/accuracies": 0.875, "rewards/chosen": -0.03408125787973404, "rewards/margins": 1.5596749782562256, "rewards/rejected": -1.5937564373016357, "step": 1220 }, { "epoch": 0.14, "learning_rate": 2.617230481095634e-07, "logits/chosen": -2.9041268825531006, "logits/rejected": -2.7624189853668213, "logps/chosen": -316.84149169921875, "logps/rejected": -446.8443603515625, "loss": 0.2016, "rewards/accuracies": 1.0, "rewards/chosen": 0.21041324734687805, "rewards/margins": 2.1312544345855713, "rewards/rejected": -1.9208412170410156, "step": 1221 }, { "epoch": 0.14, "learning_rate": 2.6168793163993913e-07, "logits/chosen": -3.041705369949341, "logits/rejected": -2.8295531272888184, "logps/chosen": -207.0461883544922, "logps/rejected": -236.07821655273438, "loss": 0.3421, "rewards/accuracies": 0.875, "rewards/chosen": 0.1854623556137085, "rewards/margins": 1.3783029317855835, "rewards/rejected": -1.192840576171875, "step": 1222 }, { "epoch": 0.14, "learning_rate": 2.616528151703149e-07, "logits/chosen": -2.881880044937134, "logits/rejected": -3.0564446449279785, "logps/chosen": -298.6236877441406, "logps/rejected": -205.32493591308594, "loss": 0.4108, "rewards/accuracies": 0.75, "rewards/chosen": -0.20007996261119843, "rewards/margins": 2.0181522369384766, "rewards/rejected": -2.2182321548461914, "step": 1223 }, { "epoch": 0.14, "learning_rate": 2.616176987006906e-07, "logits/chosen": -3.3401763439178467, "logits/rejected": -3.094081401824951, "logps/chosen": -264.14288330078125, "logps/rejected": -306.1895751953125, "loss": 0.3563, "rewards/accuracies": 0.875, "rewards/chosen": 0.1618640422821045, "rewards/margins": 1.0272811651229858, "rewards/rejected": -0.8654171824455261, "step": 1224 }, { "epoch": 0.14, "learning_rate": 2.6158258223106634e-07, "logits/chosen": -3.0743939876556396, "logits/rejected": -2.9931015968322754, "logps/chosen": -329.1996154785156, "logps/rejected": -326.04376220703125, "loss": 0.2952, "rewards/accuracies": 1.0, "rewards/chosen": -0.05222554877400398, "rewards/margins": 1.5065926313400269, "rewards/rejected": -1.5588182210922241, "step": 1225 }, { "epoch": 0.14, "learning_rate": 2.6154746576144215e-07, "logits/chosen": -3.040006637573242, "logits/rejected": -3.3644235134124756, "logps/chosen": -195.9557342529297, "logps/rejected": -341.0067443847656, "loss": 0.4675, "rewards/accuracies": 0.75, "rewards/chosen": -0.021116290241479874, "rewards/margins": 1.4113513231277466, "rewards/rejected": -1.4324675798416138, "step": 1226 }, { "epoch": 0.14, "learning_rate": 2.6151234929181785e-07, "logits/chosen": -2.9008522033691406, "logits/rejected": -2.878602981567383, "logps/chosen": -239.6218719482422, "logps/rejected": -266.8584289550781, "loss": 0.3485, "rewards/accuracies": 0.875, "rewards/chosen": 0.0775146633386612, "rewards/margins": 1.4534400701522827, "rewards/rejected": -1.3759254217147827, "step": 1227 }, { "epoch": 0.14, "learning_rate": 2.614772328221936e-07, "logits/chosen": -2.6953284740448, "logits/rejected": -2.8165056705474854, "logps/chosen": -199.06704711914062, "logps/rejected": -193.72781372070312, "loss": 0.5636, "rewards/accuracies": 0.875, "rewards/chosen": 0.062103547155857086, "rewards/margins": 0.5349035263061523, "rewards/rejected": -0.47279998660087585, "step": 1228 }, { "epoch": 0.14, "learning_rate": 2.6144211635256935e-07, "logits/chosen": -3.118995428085327, "logits/rejected": -3.1838812828063965, "logps/chosen": -196.59376525878906, "logps/rejected": -340.3057556152344, "loss": 0.3924, "rewards/accuracies": 0.875, "rewards/chosen": -0.27144718170166016, "rewards/margins": 1.4590797424316406, "rewards/rejected": -1.7305269241333008, "step": 1229 }, { "epoch": 0.14, "learning_rate": 2.614069998829451e-07, "logits/chosen": -2.4227356910705566, "logits/rejected": -2.388801097869873, "logps/chosen": -270.2369689941406, "logps/rejected": -399.65130615234375, "loss": 0.3958, "rewards/accuracies": 0.875, "rewards/chosen": 0.102063849568367, "rewards/margins": 1.264222502708435, "rewards/rejected": -1.162158727645874, "step": 1230 }, { "epoch": 0.14, "learning_rate": 2.6137188341332086e-07, "logits/chosen": -2.8920364379882812, "logits/rejected": -2.704340696334839, "logps/chosen": -325.8037414550781, "logps/rejected": -313.0450134277344, "loss": 0.5686, "rewards/accuracies": 0.625, "rewards/chosen": -0.1999027281999588, "rewards/margins": 0.6791297793388367, "rewards/rejected": -0.8790324926376343, "step": 1231 }, { "epoch": 0.14, "learning_rate": 2.6133676694369656e-07, "logits/chosen": -3.270562171936035, "logits/rejected": -3.500222682952881, "logps/chosen": -230.20037841796875, "logps/rejected": -201.35707092285156, "loss": 0.5523, "rewards/accuracies": 0.75, "rewards/chosen": 0.18336951732635498, "rewards/margins": 0.8029341101646423, "rewards/rejected": -0.6195645928382874, "step": 1232 }, { "epoch": 0.14, "learning_rate": 2.613016504740723e-07, "logits/chosen": -2.02183198928833, "logits/rejected": -2.1559906005859375, "logps/chosen": -188.16981506347656, "logps/rejected": -255.16818237304688, "loss": 0.6533, "rewards/accuracies": 0.75, "rewards/chosen": -0.26096677780151367, "rewards/margins": 0.3351914882659912, "rewards/rejected": -0.5961582064628601, "step": 1233 }, { "epoch": 0.14, "learning_rate": 2.6126653400444807e-07, "logits/chosen": -2.315589427947998, "logits/rejected": -2.302199363708496, "logps/chosen": -316.6741027832031, "logps/rejected": -372.4480285644531, "loss": 0.6047, "rewards/accuracies": 0.75, "rewards/chosen": -0.22616541385650635, "rewards/margins": 1.6129578351974487, "rewards/rejected": -1.839123249053955, "step": 1234 }, { "epoch": 0.14, "learning_rate": 2.612314175348238e-07, "logits/chosen": -3.4977946281433105, "logits/rejected": -3.2461538314819336, "logps/chosen": -214.875244140625, "logps/rejected": -199.24325561523438, "loss": 0.9586, "rewards/accuracies": 0.25, "rewards/chosen": -0.1848311722278595, "rewards/margins": -0.42164939641952515, "rewards/rejected": 0.23681819438934326, "step": 1235 }, { "epoch": 0.14, "learning_rate": 2.611963010651996e-07, "logits/chosen": -3.2507569789886475, "logits/rejected": -3.3427178859710693, "logps/chosen": -204.02667236328125, "logps/rejected": -211.50567626953125, "loss": 0.3064, "rewards/accuracies": 0.75, "rewards/chosen": 0.24388472735881805, "rewards/margins": 1.5830415487289429, "rewards/rejected": -1.3391568660736084, "step": 1236 }, { "epoch": 0.14, "learning_rate": 2.611611845955753e-07, "logits/chosen": -2.399153709411621, "logits/rejected": -2.4923818111419678, "logps/chosen": -231.37274169921875, "logps/rejected": -296.00885009765625, "loss": 0.4762, "rewards/accuracies": 0.625, "rewards/chosen": 0.04505304992198944, "rewards/margins": 1.7013713121414185, "rewards/rejected": -1.656318187713623, "step": 1237 }, { "epoch": 0.14, "learning_rate": 2.611260681259511e-07, "logits/chosen": -2.4996585845947266, "logits/rejected": -2.37514328956604, "logps/chosen": -433.9339599609375, "logps/rejected": -389.885498046875, "loss": 0.225, "rewards/accuracies": 0.875, "rewards/chosen": 0.4906667470932007, "rewards/margins": 1.8463754653930664, "rewards/rejected": -1.3557087182998657, "step": 1238 }, { "epoch": 0.14, "learning_rate": 2.6109095165632684e-07, "logits/chosen": -2.565741539001465, "logits/rejected": -2.809699535369873, "logps/chosen": -286.50823974609375, "logps/rejected": -229.80722045898438, "loss": 0.2866, "rewards/accuracies": 0.875, "rewards/chosen": 0.455525279045105, "rewards/margins": 1.3931748867034912, "rewards/rejected": -0.9376495480537415, "step": 1239 }, { "epoch": 0.14, "learning_rate": 2.6105583518670254e-07, "logits/chosen": -3.428104877471924, "logits/rejected": -3.4822864532470703, "logps/chosen": -229.9440460205078, "logps/rejected": -224.49395751953125, "loss": 0.4375, "rewards/accuracies": 0.75, "rewards/chosen": -0.12973244488239288, "rewards/margins": 1.756459355354309, "rewards/rejected": -1.8861918449401855, "step": 1240 }, { "epoch": 0.14, "learning_rate": 2.610207187170783e-07, "logits/chosen": -2.95914626121521, "logits/rejected": -2.8100621700286865, "logps/chosen": -301.2416687011719, "logps/rejected": -291.8368835449219, "loss": 0.3733, "rewards/accuracies": 1.0, "rewards/chosen": -0.016038358211517334, "rewards/margins": 0.9308395385742188, "rewards/rejected": -0.9468779563903809, "step": 1241 }, { "epoch": 0.14, "learning_rate": 2.6098560224745405e-07, "logits/chosen": -3.0953927040100098, "logits/rejected": -3.2632038593292236, "logps/chosen": -72.46780395507812, "logps/rejected": -128.88055419921875, "loss": 0.3498, "rewards/accuracies": 0.875, "rewards/chosen": 0.19745410978794098, "rewards/margins": 1.226307988166809, "rewards/rejected": -1.0288538932800293, "step": 1242 }, { "epoch": 0.14, "learning_rate": 2.609504857778298e-07, "logits/chosen": -2.893667697906494, "logits/rejected": -3.084047555923462, "logps/chosen": -192.5306396484375, "logps/rejected": -212.5204620361328, "loss": 0.3174, "rewards/accuracies": 0.75, "rewards/chosen": 0.16227740049362183, "rewards/margins": 2.066342353820801, "rewards/rejected": -1.9040648937225342, "step": 1243 }, { "epoch": 0.14, "learning_rate": 2.6091536930820556e-07, "logits/chosen": -3.1496381759643555, "logits/rejected": -3.1395297050476074, "logps/chosen": -405.1414489746094, "logps/rejected": -362.4064025878906, "loss": 0.3606, "rewards/accuracies": 0.875, "rewards/chosen": -0.10727101564407349, "rewards/margins": 1.2385269403457642, "rewards/rejected": -1.3457978963851929, "step": 1244 }, { "epoch": 0.14, "learning_rate": 2.6088025283858126e-07, "logits/chosen": -3.751851797103882, "logits/rejected": -3.6648006439208984, "logps/chosen": -351.31402587890625, "logps/rejected": -265.43951416015625, "loss": 0.5905, "rewards/accuracies": 0.625, "rewards/chosen": 0.2989659905433655, "rewards/margins": 0.8734144568443298, "rewards/rejected": -0.5744484663009644, "step": 1245 }, { "epoch": 0.14, "learning_rate": 2.60845136368957e-07, "logits/chosen": -2.638249635696411, "logits/rejected": -2.515268087387085, "logps/chosen": -122.7892074584961, "logps/rejected": -190.04763793945312, "loss": 0.3774, "rewards/accuracies": 0.875, "rewards/chosen": 0.11463932693004608, "rewards/margins": 1.002360224723816, "rewards/rejected": -0.8877209424972534, "step": 1246 }, { "epoch": 0.14, "learning_rate": 2.6081001989933277e-07, "logits/chosen": -2.860322952270508, "logits/rejected": -3.108102798461914, "logps/chosen": -283.07989501953125, "logps/rejected": -455.01971435546875, "loss": 0.4285, "rewards/accuracies": 0.75, "rewards/chosen": -0.06789693981409073, "rewards/margins": 1.4148297309875488, "rewards/rejected": -1.4827266931533813, "step": 1247 }, { "epoch": 0.14, "learning_rate": 2.607749034297085e-07, "logits/chosen": -3.7937490940093994, "logits/rejected": -3.579148054122925, "logps/chosen": -134.30584716796875, "logps/rejected": -144.80386352539062, "loss": 0.65, "rewards/accuracies": 0.75, "rewards/chosen": 0.28740403056144714, "rewards/margins": 1.2118487358093262, "rewards/rejected": -0.9244446754455566, "step": 1248 }, { "epoch": 0.14, "learning_rate": 2.6073978696008427e-07, "logits/chosen": -2.1343436241149902, "logits/rejected": -2.459807872772217, "logps/chosen": -299.5708312988281, "logps/rejected": -276.59307861328125, "loss": 0.3958, "rewards/accuracies": 0.875, "rewards/chosen": 0.04353836178779602, "rewards/margins": 1.7347865104675293, "rewards/rejected": -1.6912481784820557, "step": 1249 }, { "epoch": 0.14, "learning_rate": 2.6070467049046e-07, "logits/chosen": -3.473194122314453, "logits/rejected": -3.427959442138672, "logps/chosen": -139.63751220703125, "logps/rejected": -196.62179565429688, "loss": 0.2858, "rewards/accuracies": 0.875, "rewards/chosen": 0.17462079226970673, "rewards/margins": 1.8477861881256104, "rewards/rejected": -1.6731654405593872, "step": 1250 }, { "epoch": 0.14, "learning_rate": 2.606695540208358e-07, "logits/chosen": -2.88488507270813, "logits/rejected": -3.114806652069092, "logps/chosen": -192.2626190185547, "logps/rejected": -242.4342041015625, "loss": 0.571, "rewards/accuracies": 0.5, "rewards/chosen": 0.15763500332832336, "rewards/margins": 1.1480674743652344, "rewards/rejected": -0.9904325008392334, "step": 1251 }, { "epoch": 0.14, "learning_rate": 2.6063443755121153e-07, "logits/chosen": -3.074521064758301, "logits/rejected": -3.0216445922851562, "logps/chosen": -241.5642547607422, "logps/rejected": -258.04193115234375, "loss": 0.3361, "rewards/accuracies": 0.75, "rewards/chosen": 0.6394314169883728, "rewards/margins": 1.4470374584197998, "rewards/rejected": -0.8076058626174927, "step": 1252 }, { "epoch": 0.14, "learning_rate": 2.6059932108158724e-07, "logits/chosen": -3.411302328109741, "logits/rejected": -3.1982076168060303, "logps/chosen": -252.36891174316406, "logps/rejected": -222.7611541748047, "loss": 0.3041, "rewards/accuracies": 0.875, "rewards/chosen": 0.1281997710466385, "rewards/margins": 2.6057016849517822, "rewards/rejected": -2.477501630783081, "step": 1253 }, { "epoch": 0.14, "learning_rate": 2.60564204611963e-07, "logits/chosen": -2.6756086349487305, "logits/rejected": -2.9818902015686035, "logps/chosen": -335.9529724121094, "logps/rejected": -279.1679382324219, "loss": 0.415, "rewards/accuracies": 0.75, "rewards/chosen": 0.04776123911142349, "rewards/margins": 1.0915879011154175, "rewards/rejected": -1.043826699256897, "step": 1254 }, { "epoch": 0.14, "learning_rate": 2.6052908814233874e-07, "logits/chosen": -2.853520154953003, "logits/rejected": -2.442591667175293, "logps/chosen": -322.25640869140625, "logps/rejected": -362.79144287109375, "loss": 0.422, "rewards/accuracies": 0.75, "rewards/chosen": 0.4186449944972992, "rewards/margins": 1.2126481533050537, "rewards/rejected": -0.7940032482147217, "step": 1255 }, { "epoch": 0.14, "learning_rate": 2.604939716727145e-07, "logits/chosen": -2.764824628829956, "logits/rejected": -2.6589951515197754, "logps/chosen": -292.8194580078125, "logps/rejected": -321.891845703125, "loss": 0.8181, "rewards/accuracies": 0.625, "rewards/chosen": -0.3818066716194153, "rewards/margins": 1.3206005096435547, "rewards/rejected": -1.7024071216583252, "step": 1256 }, { "epoch": 0.14, "learning_rate": 2.6045885520309025e-07, "logits/chosen": -2.8895866870880127, "logits/rejected": -2.912273406982422, "logps/chosen": -376.0956726074219, "logps/rejected": -214.6659393310547, "loss": 0.7383, "rewards/accuracies": 0.5, "rewards/chosen": -0.23697806894779205, "rewards/margins": 0.4268755316734314, "rewards/rejected": -0.663853645324707, "step": 1257 }, { "epoch": 0.15, "learning_rate": 2.6042373873346595e-07, "logits/chosen": -3.118597984313965, "logits/rejected": -3.0233066082000732, "logps/chosen": -234.56399536132812, "logps/rejected": -260.0751953125, "loss": 0.9664, "rewards/accuracies": 0.5, "rewards/chosen": -0.4169316291809082, "rewards/margins": -0.28791970014572144, "rewards/rejected": -0.12901189923286438, "step": 1258 }, { "epoch": 0.15, "learning_rate": 2.603886222638417e-07, "logits/chosen": -4.106902599334717, "logits/rejected": -3.9498696327209473, "logps/chosen": -203.10264587402344, "logps/rejected": -171.9250946044922, "loss": 0.5098, "rewards/accuracies": 0.625, "rewards/chosen": 0.06892501562833786, "rewards/margins": 0.9735456109046936, "rewards/rejected": -0.9046206474304199, "step": 1259 }, { "epoch": 0.15, "learning_rate": 2.603535057942175e-07, "logits/chosen": -3.2452073097229004, "logits/rejected": -2.8832969665527344, "logps/chosen": -532.1001586914062, "logps/rejected": -245.87144470214844, "loss": 0.6972, "rewards/accuracies": 0.625, "rewards/chosen": -0.21020887792110443, "rewards/margins": 1.137298822402954, "rewards/rejected": -1.3475077152252197, "step": 1260 }, { "epoch": 0.15, "learning_rate": 2.603183893245932e-07, "logits/chosen": -4.053818702697754, "logits/rejected": -3.4232513904571533, "logps/chosen": -293.766845703125, "logps/rejected": -209.13522338867188, "loss": 0.458, "rewards/accuracies": 0.75, "rewards/chosen": -0.4996156692504883, "rewards/margins": 0.8672517538070679, "rewards/rejected": -1.3668673038482666, "step": 1261 }, { "epoch": 0.15, "learning_rate": 2.6028327285496897e-07, "logits/chosen": -2.867202043533325, "logits/rejected": -2.9406702518463135, "logps/chosen": -255.1750030517578, "logps/rejected": -320.5224914550781, "loss": 0.3698, "rewards/accuracies": 0.75, "rewards/chosen": -0.3969117999076843, "rewards/margins": 1.4228506088256836, "rewards/rejected": -1.8197624683380127, "step": 1262 }, { "epoch": 0.15, "learning_rate": 2.602481563853447e-07, "logits/chosen": -3.0925660133361816, "logits/rejected": -3.043455123901367, "logps/chosen": -124.40054321289062, "logps/rejected": -154.558837890625, "loss": 0.7198, "rewards/accuracies": 0.75, "rewards/chosen": -0.5317804217338562, "rewards/margins": 0.24838754534721375, "rewards/rejected": -0.7801679968833923, "step": 1263 }, { "epoch": 0.15, "learning_rate": 2.602130399157205e-07, "logits/chosen": -3.5569663047790527, "logits/rejected": -3.1190528869628906, "logps/chosen": -251.91822814941406, "logps/rejected": -229.67767333984375, "loss": 0.563, "rewards/accuracies": 0.75, "rewards/chosen": 0.0971226766705513, "rewards/margins": 0.9962552785873413, "rewards/rejected": -0.8991326093673706, "step": 1264 }, { "epoch": 0.15, "learning_rate": 2.6017792344609623e-07, "logits/chosen": -3.1028783321380615, "logits/rejected": -3.1372337341308594, "logps/chosen": -263.86541748046875, "logps/rejected": -294.6171875, "loss": 0.3522, "rewards/accuracies": 1.0, "rewards/chosen": -0.3497154414653778, "rewards/margins": 1.131418228149414, "rewards/rejected": -1.4811336994171143, "step": 1265 }, { "epoch": 0.15, "learning_rate": 2.6014280697647193e-07, "logits/chosen": -3.4317240715026855, "logits/rejected": -2.9669342041015625, "logps/chosen": -421.86822509765625, "logps/rejected": -480.8500671386719, "loss": 0.3302, "rewards/accuracies": 0.75, "rewards/chosen": 0.16947278380393982, "rewards/margins": 2.216059684753418, "rewards/rejected": -2.0465869903564453, "step": 1266 }, { "epoch": 0.15, "learning_rate": 2.601076905068477e-07, "logits/chosen": -2.837587594985962, "logits/rejected": -2.6420607566833496, "logps/chosen": -315.09417724609375, "logps/rejected": -285.3146057128906, "loss": 0.3883, "rewards/accuracies": 0.75, "rewards/chosen": 0.1792723685503006, "rewards/margins": 1.0624361038208008, "rewards/rejected": -0.8831638097763062, "step": 1267 }, { "epoch": 0.15, "learning_rate": 2.6007257403722344e-07, "logits/chosen": -3.2752652168273926, "logits/rejected": -3.2611289024353027, "logps/chosen": -343.8131103515625, "logps/rejected": -227.1166534423828, "loss": 0.4165, "rewards/accuracies": 0.875, "rewards/chosen": -0.2622881829738617, "rewards/margins": 1.7003253698349, "rewards/rejected": -1.962613582611084, "step": 1268 }, { "epoch": 0.15, "learning_rate": 2.600374575675992e-07, "logits/chosen": -3.229914665222168, "logits/rejected": -3.509070634841919, "logps/chosen": -512.1622314453125, "logps/rejected": -375.2642822265625, "loss": 0.2016, "rewards/accuracies": 1.0, "rewards/chosen": 0.022244591265916824, "rewards/margins": 2.8704724311828613, "rewards/rejected": -2.8482279777526855, "step": 1269 }, { "epoch": 0.15, "learning_rate": 2.6000234109797495e-07, "logits/chosen": -2.4674558639526367, "logits/rejected": -2.614603042602539, "logps/chosen": -404.96197509765625, "logps/rejected": -350.16046142578125, "loss": 0.5131, "rewards/accuracies": 0.75, "rewards/chosen": -0.17047154903411865, "rewards/margins": 0.685597836971283, "rewards/rejected": -0.8560694456100464, "step": 1270 }, { "epoch": 0.15, "learning_rate": 2.599672246283507e-07, "logits/chosen": -3.1197214126586914, "logits/rejected": -3.0060253143310547, "logps/chosen": -366.09332275390625, "logps/rejected": -249.36624145507812, "loss": 0.3422, "rewards/accuracies": 0.75, "rewards/chosen": 0.1908625066280365, "rewards/margins": 1.33160400390625, "rewards/rejected": -1.1407414674758911, "step": 1271 }, { "epoch": 0.15, "learning_rate": 2.5993210815872645e-07, "logits/chosen": -2.741081476211548, "logits/rejected": -2.368004322052002, "logps/chosen": -334.4292297363281, "logps/rejected": -326.1259765625, "loss": 0.5526, "rewards/accuracies": 0.625, "rewards/chosen": -0.09303249418735504, "rewards/margins": 0.712805986404419, "rewards/rejected": -0.8058385252952576, "step": 1272 }, { "epoch": 0.15, "learning_rate": 2.598969916891022e-07, "logits/chosen": -2.9212374687194824, "logits/rejected": -2.659376621246338, "logps/chosen": -282.4862060546875, "logps/rejected": -204.09007263183594, "loss": 0.4624, "rewards/accuracies": 0.75, "rewards/chosen": 0.15247690677642822, "rewards/margins": 1.0850062370300293, "rewards/rejected": -0.9325292110443115, "step": 1273 }, { "epoch": 0.15, "learning_rate": 2.598618752194779e-07, "logits/chosen": -2.710968255996704, "logits/rejected": -2.8165411949157715, "logps/chosen": -232.6937255859375, "logps/rejected": -423.9354553222656, "loss": 0.3082, "rewards/accuracies": 0.875, "rewards/chosen": 0.05913534015417099, "rewards/margins": 2.411250114440918, "rewards/rejected": -2.352114677429199, "step": 1274 }, { "epoch": 0.15, "learning_rate": 2.5982675874985366e-07, "logits/chosen": -3.1423983573913574, "logits/rejected": -3.17585825920105, "logps/chosen": -117.81981658935547, "logps/rejected": -206.79238891601562, "loss": 0.3499, "rewards/accuracies": 0.875, "rewards/chosen": -0.3020673394203186, "rewards/margins": 1.8536884784698486, "rewards/rejected": -2.1557559967041016, "step": 1275 }, { "epoch": 0.15, "learning_rate": 2.597916422802294e-07, "logits/chosen": -2.9921231269836426, "logits/rejected": -3.17488956451416, "logps/chosen": -207.14071655273438, "logps/rejected": -308.4000549316406, "loss": 0.2779, "rewards/accuracies": 0.875, "rewards/chosen": -0.03297881782054901, "rewards/margins": 1.7886940240859985, "rewards/rejected": -1.8216726779937744, "step": 1276 }, { "epoch": 0.15, "learning_rate": 2.5975652581060517e-07, "logits/chosen": -3.2946152687072754, "logits/rejected": -2.874213218688965, "logps/chosen": -382.6255798339844, "logps/rejected": -251.70358276367188, "loss": 0.4156, "rewards/accuracies": 0.75, "rewards/chosen": 0.16489487886428833, "rewards/margins": 1.3828957080841064, "rewards/rejected": -1.218000888824463, "step": 1277 }, { "epoch": 0.15, "learning_rate": 2.597214093409809e-07, "logits/chosen": -3.8209757804870605, "logits/rejected": -3.94968843460083, "logps/chosen": -166.04222106933594, "logps/rejected": -219.08106994628906, "loss": 0.3817, "rewards/accuracies": 0.875, "rewards/chosen": -0.0673372820019722, "rewards/margins": 1.873173475265503, "rewards/rejected": -1.940510630607605, "step": 1278 }, { "epoch": 0.15, "learning_rate": 2.596862928713566e-07, "logits/chosen": -2.888946533203125, "logits/rejected": -2.8783323764801025, "logps/chosen": -151.70892333984375, "logps/rejected": -185.6434783935547, "loss": 0.5683, "rewards/accuracies": 0.625, "rewards/chosen": 0.04054642841219902, "rewards/margins": 0.4204131066799164, "rewards/rejected": -0.37986665964126587, "step": 1279 }, { "epoch": 0.15, "learning_rate": 2.596511764017324e-07, "logits/chosen": -2.5820536613464355, "logits/rejected": -2.842998504638672, "logps/chosen": -345.49261474609375, "logps/rejected": -290.9452819824219, "loss": 0.3511, "rewards/accuracies": 1.0, "rewards/chosen": -0.11564323306083679, "rewards/margins": 1.0789257287979126, "rewards/rejected": -1.1945688724517822, "step": 1280 }, { "epoch": 0.15, "learning_rate": 2.5961605993210813e-07, "logits/chosen": -3.3668510913848877, "logits/rejected": -3.2866597175598145, "logps/chosen": -383.4378662109375, "logps/rejected": -355.94366455078125, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": 0.30375710129737854, "rewards/margins": 4.16273832321167, "rewards/rejected": -3.8589813709259033, "step": 1281 }, { "epoch": 0.15, "learning_rate": 2.595809434624839e-07, "logits/chosen": -3.7810850143432617, "logits/rejected": -3.6944327354431152, "logps/chosen": -299.6083984375, "logps/rejected": -158.12698364257812, "loss": 0.9182, "rewards/accuracies": 0.5, "rewards/chosen": -0.8842620849609375, "rewards/margins": 0.2196952998638153, "rewards/rejected": -1.1039574146270752, "step": 1282 }, { "epoch": 0.15, "learning_rate": 2.5954582699285964e-07, "logits/chosen": -3.3617372512817383, "logits/rejected": -3.576263666152954, "logps/chosen": -111.85790252685547, "logps/rejected": -132.98446655273438, "loss": 0.4583, "rewards/accuracies": 0.875, "rewards/chosen": 0.1069284975528717, "rewards/margins": 0.9012032747268677, "rewards/rejected": -0.7942748069763184, "step": 1283 }, { "epoch": 0.15, "learning_rate": 2.595107105232354e-07, "logits/chosen": -3.6058006286621094, "logits/rejected": -3.1365981101989746, "logps/chosen": -241.54397583007812, "logps/rejected": -213.15835571289062, "loss": 0.6164, "rewards/accuracies": 0.75, "rewards/chosen": -0.35670149326324463, "rewards/margins": 1.0320377349853516, "rewards/rejected": -1.3887391090393066, "step": 1284 }, { "epoch": 0.15, "learning_rate": 2.5947559405361115e-07, "logits/chosen": -2.5054614543914795, "logits/rejected": -2.9064981937408447, "logps/chosen": -282.3585205078125, "logps/rejected": -191.29638671875, "loss": 0.1967, "rewards/accuracies": 1.0, "rewards/chosen": 0.6501674056053162, "rewards/margins": 2.5472280979156494, "rewards/rejected": -1.897060751914978, "step": 1285 }, { "epoch": 0.15, "learning_rate": 2.594404775839869e-07, "logits/chosen": -3.532264232635498, "logits/rejected": -3.5364441871643066, "logps/chosen": -206.72787475585938, "logps/rejected": -194.4606170654297, "loss": 0.5487, "rewards/accuracies": 0.625, "rewards/chosen": -0.23328472673892975, "rewards/margins": 0.772308886051178, "rewards/rejected": -1.0055935382843018, "step": 1286 }, { "epoch": 0.15, "learning_rate": 2.594053611143626e-07, "logits/chosen": -3.2990620136260986, "logits/rejected": -3.459531307220459, "logps/chosen": -245.9703369140625, "logps/rejected": -217.69302368164062, "loss": 0.4682, "rewards/accuracies": 0.75, "rewards/chosen": 0.08416178077459335, "rewards/margins": 0.6740147471427917, "rewards/rejected": -0.5898529887199402, "step": 1287 }, { "epoch": 0.15, "learning_rate": 2.5937024464473836e-07, "logits/chosen": -4.074586868286133, "logits/rejected": -4.008978843688965, "logps/chosen": -164.36434936523438, "logps/rejected": -130.31381225585938, "loss": 0.5328, "rewards/accuracies": 0.625, "rewards/chosen": -0.17911836504936218, "rewards/margins": 1.4981013536453247, "rewards/rejected": -1.6772196292877197, "step": 1288 }, { "epoch": 0.15, "learning_rate": 2.593351281751141e-07, "logits/chosen": -2.689720392227173, "logits/rejected": -2.728484630584717, "logps/chosen": -291.118896484375, "logps/rejected": -225.03164672851562, "loss": 0.718, "rewards/accuracies": 0.625, "rewards/chosen": -0.2973710894584656, "rewards/margins": 0.5966485738754272, "rewards/rejected": -0.8940197825431824, "step": 1289 }, { "epoch": 0.15, "learning_rate": 2.5930001170548986e-07, "logits/chosen": -3.393251657485962, "logits/rejected": -3.3632326126098633, "logps/chosen": -271.45233154296875, "logps/rejected": -283.763916015625, "loss": 0.3478, "rewards/accuracies": 0.875, "rewards/chosen": 0.07456254959106445, "rewards/margins": 1.062697410583496, "rewards/rejected": -0.9881348609924316, "step": 1290 }, { "epoch": 0.15, "learning_rate": 2.592648952358656e-07, "logits/chosen": -3.6126067638397217, "logits/rejected": -3.651167392730713, "logps/chosen": -255.2904052734375, "logps/rejected": -241.7370147705078, "loss": 0.4147, "rewards/accuracies": 0.875, "rewards/chosen": 0.2138913869857788, "rewards/margins": 2.8744282722473145, "rewards/rejected": -2.660537004470825, "step": 1291 }, { "epoch": 0.15, "learning_rate": 2.5922977876624137e-07, "logits/chosen": -2.827080726623535, "logits/rejected": -3.1792807579040527, "logps/chosen": -296.0008239746094, "logps/rejected": -284.73541259765625, "loss": 0.3339, "rewards/accuracies": 0.75, "rewards/chosen": -0.0038594603538513184, "rewards/margins": 1.9282746315002441, "rewards/rejected": -1.9321341514587402, "step": 1292 }, { "epoch": 0.15, "learning_rate": 2.5919466229661707e-07, "logits/chosen": -2.8907952308654785, "logits/rejected": -2.895859718322754, "logps/chosen": -211.64186096191406, "logps/rejected": -241.32791137695312, "loss": 0.5372, "rewards/accuracies": 0.875, "rewards/chosen": -0.3302392363548279, "rewards/margins": 0.5769890546798706, "rewards/rejected": -0.9072282910346985, "step": 1293 }, { "epoch": 0.15, "learning_rate": 2.591595458269929e-07, "logits/chosen": -3.444784641265869, "logits/rejected": -3.0962746143341064, "logps/chosen": -291.75152587890625, "logps/rejected": -136.48199462890625, "loss": 0.8326, "rewards/accuracies": 0.625, "rewards/chosen": -0.4753166139125824, "rewards/margins": 0.36070555448532104, "rewards/rejected": -0.8360220789909363, "step": 1294 }, { "epoch": 0.15, "learning_rate": 2.591244293573686e-07, "logits/chosen": -3.5744259357452393, "logits/rejected": -3.6351099014282227, "logps/chosen": -230.36766052246094, "logps/rejected": -158.3636932373047, "loss": 0.5953, "rewards/accuracies": 0.625, "rewards/chosen": -0.2603738307952881, "rewards/margins": 0.6703643798828125, "rewards/rejected": -0.9307381510734558, "step": 1295 }, { "epoch": 0.15, "learning_rate": 2.5908931288774433e-07, "logits/chosen": -2.756112813949585, "logits/rejected": -2.728919267654419, "logps/chosen": -330.05670166015625, "logps/rejected": -325.7234191894531, "loss": 0.2788, "rewards/accuracies": 0.875, "rewards/chosen": 0.4362429082393646, "rewards/margins": 1.570178508758545, "rewards/rejected": -1.133935570716858, "step": 1296 }, { "epoch": 0.15, "learning_rate": 2.590541964181201e-07, "logits/chosen": -3.059100866317749, "logits/rejected": -3.200026512145996, "logps/chosen": -214.97756958007812, "logps/rejected": -276.8621826171875, "loss": 0.4629, "rewards/accuracies": 0.75, "rewards/chosen": -0.15328067541122437, "rewards/margins": 0.8713489770889282, "rewards/rejected": -1.0246297121047974, "step": 1297 }, { "epoch": 0.15, "learning_rate": 2.5901907994849584e-07, "logits/chosen": -3.480208396911621, "logits/rejected": -3.6882548332214355, "logps/chosen": -293.0123291015625, "logps/rejected": -277.376220703125, "loss": 0.7636, "rewards/accuracies": 0.875, "rewards/chosen": -0.4127768874168396, "rewards/margins": 1.7253131866455078, "rewards/rejected": -2.138090133666992, "step": 1298 }, { "epoch": 0.15, "learning_rate": 2.589839634788716e-07, "logits/chosen": -2.7288150787353516, "logits/rejected": -2.754058361053467, "logps/chosen": -272.82305908203125, "logps/rejected": -338.906494140625, "loss": 0.5749, "rewards/accuracies": 0.75, "rewards/chosen": -0.5219551920890808, "rewards/margins": 1.0228451490402222, "rewards/rejected": -1.5448004007339478, "step": 1299 }, { "epoch": 0.15, "learning_rate": 2.5894884700924735e-07, "logits/chosen": -3.9196786880493164, "logits/rejected": -4.041879653930664, "logps/chosen": -107.51551055908203, "logps/rejected": -183.47714233398438, "loss": 0.6468, "rewards/accuracies": 0.75, "rewards/chosen": -0.284287691116333, "rewards/margins": 1.2184343338012695, "rewards/rejected": -1.502721905708313, "step": 1300 }, { "epoch": 0.15, "learning_rate": 2.5891373053962305e-07, "logits/chosen": -2.800198793411255, "logits/rejected": -2.808199644088745, "logps/chosen": -212.77622985839844, "logps/rejected": -249.2012939453125, "loss": 0.7444, "rewards/accuracies": 0.375, "rewards/chosen": -0.19236822426319122, "rewards/margins": 0.7992057204246521, "rewards/rejected": -0.9915739297866821, "step": 1301 }, { "epoch": 0.15, "learning_rate": 2.588786140699988e-07, "logits/chosen": -3.0747416019439697, "logits/rejected": -3.10546612739563, "logps/chosen": -263.40899658203125, "logps/rejected": -244.4541015625, "loss": 0.3898, "rewards/accuracies": 1.0, "rewards/chosen": -0.15884999930858612, "rewards/margins": 1.0069949626922607, "rewards/rejected": -1.1658449172973633, "step": 1302 }, { "epoch": 0.15, "learning_rate": 2.5884349760037456e-07, "logits/chosen": -2.9573512077331543, "logits/rejected": -3.1573257446289062, "logps/chosen": -255.2872314453125, "logps/rejected": -378.9390869140625, "loss": 0.5537, "rewards/accuracies": 0.75, "rewards/chosen": -0.1679532527923584, "rewards/margins": 1.058525800704956, "rewards/rejected": -1.2264790534973145, "step": 1303 }, { "epoch": 0.15, "learning_rate": 2.588083811307503e-07, "logits/chosen": -3.092365264892578, "logits/rejected": -2.8742856979370117, "logps/chosen": -253.04208374023438, "logps/rejected": -148.779296875, "loss": 0.4893, "rewards/accuracies": 0.875, "rewards/chosen": 0.06802567839622498, "rewards/margins": 1.276638388633728, "rewards/rejected": -1.2086126804351807, "step": 1304 }, { "epoch": 0.15, "learning_rate": 2.5877326466112607e-07, "logits/chosen": -3.113018751144409, "logits/rejected": -3.186005115509033, "logps/chosen": -138.67953491210938, "logps/rejected": -145.1586456298828, "loss": 0.478, "rewards/accuracies": 0.75, "rewards/chosen": 0.09670032560825348, "rewards/margins": 0.9930595755577087, "rewards/rejected": -0.8963592648506165, "step": 1305 }, { "epoch": 0.15, "learning_rate": 2.587381481915018e-07, "logits/chosen": -3.447582483291626, "logits/rejected": -3.4858503341674805, "logps/chosen": -192.93130493164062, "logps/rejected": -187.5643310546875, "loss": 0.3989, "rewards/accuracies": 0.875, "rewards/chosen": 0.3504765033721924, "rewards/margins": 0.9960402250289917, "rewards/rejected": -0.6455637216567993, "step": 1306 }, { "epoch": 0.15, "learning_rate": 2.5870303172187757e-07, "logits/chosen": -3.5059256553649902, "logits/rejected": -3.4606306552886963, "logps/chosen": -305.87811279296875, "logps/rejected": -251.9505157470703, "loss": 0.4216, "rewards/accuracies": 0.75, "rewards/chosen": -0.41335949301719666, "rewards/margins": 1.1022999286651611, "rewards/rejected": -1.5156594514846802, "step": 1307 }, { "epoch": 0.15, "learning_rate": 2.5866791525225333e-07, "logits/chosen": -3.2620432376861572, "logits/rejected": -3.291475296020508, "logps/chosen": -356.01129150390625, "logps/rejected": -222.39328002929688, "loss": 0.4457, "rewards/accuracies": 1.0, "rewards/chosen": 0.15751652419567108, "rewards/margins": 0.9991155862808228, "rewards/rejected": -0.8415990471839905, "step": 1308 }, { "epoch": 0.15, "learning_rate": 2.5863279878262903e-07, "logits/chosen": -2.914588212966919, "logits/rejected": -2.7374255657196045, "logps/chosen": -341.7193603515625, "logps/rejected": -304.9005432128906, "loss": 0.4779, "rewards/accuracies": 0.75, "rewards/chosen": 0.10665670782327652, "rewards/margins": 1.4474871158599854, "rewards/rejected": -1.3408302068710327, "step": 1309 }, { "epoch": 0.15, "learning_rate": 2.585976823130048e-07, "logits/chosen": -3.4403414726257324, "logits/rejected": -3.1371870040893555, "logps/chosen": -153.07095336914062, "logps/rejected": -145.9384002685547, "loss": 0.4955, "rewards/accuracies": 0.75, "rewards/chosen": -0.6596205234527588, "rewards/margins": 0.7322850823402405, "rewards/rejected": -1.3919057846069336, "step": 1310 }, { "epoch": 0.15, "learning_rate": 2.5856256584338054e-07, "logits/chosen": -3.120992422103882, "logits/rejected": -3.071319341659546, "logps/chosen": -242.12005615234375, "logps/rejected": -289.5521240234375, "loss": 0.5491, "rewards/accuracies": 0.625, "rewards/chosen": 0.10733366012573242, "rewards/margins": 0.7503787279129028, "rewards/rejected": -0.6430450081825256, "step": 1311 }, { "epoch": 0.15, "learning_rate": 2.585274493737563e-07, "logits/chosen": -2.70572829246521, "logits/rejected": -2.6100759506225586, "logps/chosen": -345.95416259765625, "logps/rejected": -226.0701446533203, "loss": 0.2302, "rewards/accuracies": 1.0, "rewards/chosen": 0.551724910736084, "rewards/margins": 1.677099585533142, "rewards/rejected": -1.1253745555877686, "step": 1312 }, { "epoch": 0.15, "learning_rate": 2.5849233290413204e-07, "logits/chosen": -2.434321641921997, "logits/rejected": -2.4609122276306152, "logps/chosen": -336.62823486328125, "logps/rejected": -291.9024658203125, "loss": 0.3314, "rewards/accuracies": 1.0, "rewards/chosen": -0.06507334858179092, "rewards/margins": 1.4492993354797363, "rewards/rejected": -1.51437246799469, "step": 1313 }, { "epoch": 0.15, "learning_rate": 2.5845721643450774e-07, "logits/chosen": -3.1006526947021484, "logits/rejected": -2.8798556327819824, "logps/chosen": -343.8378601074219, "logps/rejected": -280.94281005859375, "loss": 0.4671, "rewards/accuracies": 0.875, "rewards/chosen": 0.025163792073726654, "rewards/margins": 1.2496585845947266, "rewards/rejected": -1.2244948148727417, "step": 1314 }, { "epoch": 0.15, "learning_rate": 2.584220999648835e-07, "logits/chosen": -2.8396191596984863, "logits/rejected": -3.412388801574707, "logps/chosen": -134.468505859375, "logps/rejected": -190.02305603027344, "loss": 0.4279, "rewards/accuracies": 0.75, "rewards/chosen": 0.3174448609352112, "rewards/margins": 1.9064357280731201, "rewards/rejected": -1.5889910459518433, "step": 1315 }, { "epoch": 0.15, "learning_rate": 2.583869834952593e-07, "logits/chosen": -3.92814302444458, "logits/rejected": -3.6312851905822754, "logps/chosen": -235.47775268554688, "logps/rejected": -195.09982299804688, "loss": 0.4433, "rewards/accuracies": 0.75, "rewards/chosen": 0.1444067656993866, "rewards/margins": 0.8122756481170654, "rewards/rejected": -0.6678689122200012, "step": 1316 }, { "epoch": 0.15, "learning_rate": 2.58351867025635e-07, "logits/chosen": -2.1425931453704834, "logits/rejected": -2.3161802291870117, "logps/chosen": -238.81643676757812, "logps/rejected": -198.32586669921875, "loss": 0.4758, "rewards/accuracies": 0.75, "rewards/chosen": -0.1310850977897644, "rewards/margins": 1.0565693378448486, "rewards/rejected": -1.1876544952392578, "step": 1317 }, { "epoch": 0.15, "learning_rate": 2.5831675055601076e-07, "logits/chosen": -3.299617290496826, "logits/rejected": -3.3087098598480225, "logps/chosen": -144.30484008789062, "logps/rejected": -235.2721405029297, "loss": 0.2335, "rewards/accuracies": 0.875, "rewards/chosen": 0.09746107459068298, "rewards/margins": 2.4311635494232178, "rewards/rejected": -2.333702325820923, "step": 1318 }, { "epoch": 0.15, "learning_rate": 2.582816340863865e-07, "logits/chosen": -3.2929329872131348, "logits/rejected": -3.379911422729492, "logps/chosen": -116.9150619506836, "logps/rejected": -212.86988830566406, "loss": 0.5589, "rewards/accuracies": 0.625, "rewards/chosen": 0.15315155684947968, "rewards/margins": 1.0505399703979492, "rewards/rejected": -0.8973883986473083, "step": 1319 }, { "epoch": 0.15, "learning_rate": 2.5824651761676227e-07, "logits/chosen": -3.469623565673828, "logits/rejected": -3.279270648956299, "logps/chosen": -231.5189208984375, "logps/rejected": -184.39744567871094, "loss": 0.5347, "rewards/accuracies": 0.75, "rewards/chosen": 0.04548273980617523, "rewards/margins": 0.6871821880340576, "rewards/rejected": -0.6416994333267212, "step": 1320 }, { "epoch": 0.15, "learning_rate": 2.58211401147138e-07, "logits/chosen": -3.0359675884246826, "logits/rejected": -3.128007650375366, "logps/chosen": -432.75286865234375, "logps/rejected": -339.6465148925781, "loss": 0.5621, "rewards/accuracies": 0.75, "rewards/chosen": 0.188251331448555, "rewards/margins": 0.9831944108009338, "rewards/rejected": -0.79494309425354, "step": 1321 }, { "epoch": 0.15, "learning_rate": 2.581762846775137e-07, "logits/chosen": -3.544379234313965, "logits/rejected": -3.136430263519287, "logps/chosen": -318.9771728515625, "logps/rejected": -327.9295654296875, "loss": 0.6057, "rewards/accuracies": 0.625, "rewards/chosen": -0.24966707825660706, "rewards/margins": 1.5483887195587158, "rewards/rejected": -1.7980557680130005, "step": 1322 }, { "epoch": 0.15, "learning_rate": 2.581411682078895e-07, "logits/chosen": -3.1763789653778076, "logits/rejected": -3.3069419860839844, "logps/chosen": -227.46932983398438, "logps/rejected": -166.3903045654297, "loss": 0.4, "rewards/accuracies": 0.875, "rewards/chosen": 0.08789289742708206, "rewards/margins": 1.0674495697021484, "rewards/rejected": -0.9795567393302917, "step": 1323 }, { "epoch": 0.15, "learning_rate": 2.5810605173826523e-07, "logits/chosen": -2.833911895751953, "logits/rejected": -2.856973648071289, "logps/chosen": -213.52345275878906, "logps/rejected": -294.8507080078125, "loss": 0.354, "rewards/accuracies": 0.875, "rewards/chosen": 0.01629640907049179, "rewards/margins": 1.7067673206329346, "rewards/rejected": -1.6904709339141846, "step": 1324 }, { "epoch": 0.15, "learning_rate": 2.58070935268641e-07, "logits/chosen": -2.3931326866149902, "logits/rejected": -2.4397151470184326, "logps/chosen": -218.00160217285156, "logps/rejected": -239.77919006347656, "loss": 0.4171, "rewards/accuracies": 0.875, "rewards/chosen": 0.08324562013149261, "rewards/margins": 1.0585038661956787, "rewards/rejected": -0.9752583503723145, "step": 1325 }, { "epoch": 0.15, "learning_rate": 2.5803581879901674e-07, "logits/chosen": -2.6797802448272705, "logits/rejected": -2.593536853790283, "logps/chosen": -241.52352905273438, "logps/rejected": -298.5032653808594, "loss": 0.3037, "rewards/accuracies": 0.875, "rewards/chosen": 0.1565411239862442, "rewards/margins": 1.6411848068237305, "rewards/rejected": -1.484643578529358, "step": 1326 }, { "epoch": 0.15, "learning_rate": 2.5800070232939244e-07, "logits/chosen": -3.2821755409240723, "logits/rejected": -3.0782220363616943, "logps/chosen": -233.2435302734375, "logps/rejected": -292.4256591796875, "loss": 0.449, "rewards/accuracies": 0.625, "rewards/chosen": -0.0727543756365776, "rewards/margins": 1.4862308502197266, "rewards/rejected": -1.5589852333068848, "step": 1327 }, { "epoch": 0.15, "learning_rate": 2.5796558585976825e-07, "logits/chosen": -3.5607621669769287, "logits/rejected": -3.2520854473114014, "logps/chosen": -451.402587890625, "logps/rejected": -259.1007385253906, "loss": 0.653, "rewards/accuracies": 0.5, "rewards/chosen": -0.12275351583957672, "rewards/margins": 0.33525529503822327, "rewards/rejected": -0.4580088257789612, "step": 1328 }, { "epoch": 0.15, "learning_rate": 2.57930469390144e-07, "logits/chosen": -3.107207775115967, "logits/rejected": -3.1797850131988525, "logps/chosen": -374.5860595703125, "logps/rejected": -537.3178100585938, "loss": 0.3995, "rewards/accuracies": 0.75, "rewards/chosen": 0.0880943238735199, "rewards/margins": 1.243980884552002, "rewards/rejected": -1.1558865308761597, "step": 1329 }, { "epoch": 0.15, "learning_rate": 2.578953529205197e-07, "logits/chosen": -2.8288464546203613, "logits/rejected": -2.8298370838165283, "logps/chosen": -220.2200927734375, "logps/rejected": -378.2925720214844, "loss": 0.4168, "rewards/accuracies": 0.75, "rewards/chosen": -0.34553563594818115, "rewards/margins": 1.497823715209961, "rewards/rejected": -1.843359351158142, "step": 1330 }, { "epoch": 0.15, "learning_rate": 2.5786023645089545e-07, "logits/chosen": -3.011458396911621, "logits/rejected": -2.9832024574279785, "logps/chosen": -194.14791870117188, "logps/rejected": -300.98291015625, "loss": 0.2893, "rewards/accuracies": 0.875, "rewards/chosen": -0.008231490850448608, "rewards/margins": 2.0071983337402344, "rewards/rejected": -2.015429735183716, "step": 1331 }, { "epoch": 0.15, "learning_rate": 2.578251199812712e-07, "logits/chosen": -3.0211377143859863, "logits/rejected": -2.7029144763946533, "logps/chosen": -299.80572509765625, "logps/rejected": -187.5726318359375, "loss": 0.6022, "rewards/accuracies": 0.625, "rewards/chosen": -0.20919884741306305, "rewards/margins": 0.7625776529312134, "rewards/rejected": -0.9717764854431152, "step": 1332 }, { "epoch": 0.15, "learning_rate": 2.5779000351164696e-07, "logits/chosen": -2.910921573638916, "logits/rejected": -2.8228819370269775, "logps/chosen": -320.490234375, "logps/rejected": -363.3335266113281, "loss": 0.4639, "rewards/accuracies": 0.625, "rewards/chosen": -0.23510269820690155, "rewards/margins": 1.20047128200531, "rewards/rejected": -1.435573935508728, "step": 1333 }, { "epoch": 0.15, "learning_rate": 2.577548870420227e-07, "logits/chosen": -1.9972152709960938, "logits/rejected": -1.8992042541503906, "logps/chosen": -279.6734924316406, "logps/rejected": -266.1451721191406, "loss": 0.6477, "rewards/accuracies": 0.5, "rewards/chosen": -0.17433536052703857, "rewards/margins": 0.2880229949951172, "rewards/rejected": -0.46235838532447815, "step": 1334 }, { "epoch": 0.15, "learning_rate": 2.577197705723984e-07, "logits/chosen": -2.8397796154022217, "logits/rejected": -2.9166674613952637, "logps/chosen": -172.4813690185547, "logps/rejected": -178.2071533203125, "loss": 0.5685, "rewards/accuracies": 0.375, "rewards/chosen": -0.49427711963653564, "rewards/margins": 0.9155290126800537, "rewards/rejected": -1.4098061323165894, "step": 1335 }, { "epoch": 0.15, "learning_rate": 2.5768465410277417e-07, "logits/chosen": -3.04011607170105, "logits/rejected": -3.047320604324341, "logps/chosen": -141.34194946289062, "logps/rejected": -185.8659210205078, "loss": 0.3501, "rewards/accuracies": 1.0, "rewards/chosen": -0.23633472621440887, "rewards/margins": 1.0943012237548828, "rewards/rejected": -1.330635905265808, "step": 1336 }, { "epoch": 0.15, "learning_rate": 2.576495376331499e-07, "logits/chosen": -2.802640914916992, "logits/rejected": -2.7107882499694824, "logps/chosen": -378.320068359375, "logps/rejected": -393.7737731933594, "loss": 0.3839, "rewards/accuracies": 0.875, "rewards/chosen": -0.1115812212228775, "rewards/margins": 1.0793211460113525, "rewards/rejected": -1.1909023523330688, "step": 1337 }, { "epoch": 0.15, "learning_rate": 2.576144211635257e-07, "logits/chosen": -3.5441951751708984, "logits/rejected": -3.316993474960327, "logps/chosen": -253.17745971679688, "logps/rejected": -195.74221801757812, "loss": 0.4647, "rewards/accuracies": 0.625, "rewards/chosen": 0.14122381806373596, "rewards/margins": 0.9724863171577454, "rewards/rejected": -0.831262469291687, "step": 1338 }, { "epoch": 0.15, "learning_rate": 2.5757930469390143e-07, "logits/chosen": -3.1588149070739746, "logits/rejected": -3.020139694213867, "logps/chosen": -300.21502685546875, "logps/rejected": -357.209228515625, "loss": 0.7251, "rewards/accuracies": 0.625, "rewards/chosen": -0.279959499835968, "rewards/margins": 0.26815205812454224, "rewards/rejected": -0.5481115579605103, "step": 1339 }, { "epoch": 0.15, "learning_rate": 2.575441882242772e-07, "logits/chosen": -2.766725778579712, "logits/rejected": -2.8605597019195557, "logps/chosen": -348.01904296875, "logps/rejected": -205.75148010253906, "loss": 0.6724, "rewards/accuracies": 0.875, "rewards/chosen": -0.19295647740364075, "rewards/margins": 0.4833073318004608, "rewards/rejected": -0.6762638092041016, "step": 1340 }, { "epoch": 0.15, "learning_rate": 2.5750907175465294e-07, "logits/chosen": -2.8689775466918945, "logits/rejected": -2.532484292984009, "logps/chosen": -243.0447540283203, "logps/rejected": -359.3069763183594, "loss": 0.2083, "rewards/accuracies": 1.0, "rewards/chosen": 0.5818072557449341, "rewards/margins": 2.804609775543213, "rewards/rejected": -2.2228024005889893, "step": 1341 }, { "epoch": 0.15, "learning_rate": 2.574739552850287e-07, "logits/chosen": -3.046572208404541, "logits/rejected": -3.5317859649658203, "logps/chosen": -173.205078125, "logps/rejected": -150.58621215820312, "loss": 0.4521, "rewards/accuracies": 0.875, "rewards/chosen": -0.17993669211864471, "rewards/margins": 0.9181038737297058, "rewards/rejected": -1.0980406999588013, "step": 1342 }, { "epoch": 0.15, "learning_rate": 2.574388388154044e-07, "logits/chosen": -2.9279863834381104, "logits/rejected": -2.8664026260375977, "logps/chosen": -386.1767883300781, "logps/rejected": -312.8895263671875, "loss": 0.6385, "rewards/accuracies": 0.625, "rewards/chosen": -0.5790375471115112, "rewards/margins": 0.6932671070098877, "rewards/rejected": -1.2723045349121094, "step": 1343 }, { "epoch": 0.15, "learning_rate": 2.5740372234578015e-07, "logits/chosen": -3.529600143432617, "logits/rejected": -3.6541333198547363, "logps/chosen": -190.43984985351562, "logps/rejected": -189.32638549804688, "loss": 0.3922, "rewards/accuracies": 0.875, "rewards/chosen": 0.2642107307910919, "rewards/margins": 1.0616402626037598, "rewards/rejected": -0.7974294424057007, "step": 1344 }, { "epoch": 0.16, "learning_rate": 2.573686058761559e-07, "logits/chosen": -3.0529565811157227, "logits/rejected": -3.256930351257324, "logps/chosen": -415.5158996582031, "logps/rejected": -347.5145263671875, "loss": 0.3115, "rewards/accuracies": 0.75, "rewards/chosen": 0.01732298731803894, "rewards/margins": 2.555565357208252, "rewards/rejected": -2.5382423400878906, "step": 1345 }, { "epoch": 0.16, "learning_rate": 2.5733348940653166e-07, "logits/chosen": -3.524427890777588, "logits/rejected": -3.2087621688842773, "logps/chosen": -521.156494140625, "logps/rejected": -338.38818359375, "loss": 0.2586, "rewards/accuracies": 1.0, "rewards/chosen": 0.24943920969963074, "rewards/margins": 2.0311365127563477, "rewards/rejected": -1.781697392463684, "step": 1346 }, { "epoch": 0.16, "learning_rate": 2.572983729369074e-07, "logits/chosen": -2.212906837463379, "logits/rejected": -2.129861831665039, "logps/chosen": -281.44464111328125, "logps/rejected": -254.72792053222656, "loss": 0.683, "rewards/accuracies": 0.5, "rewards/chosen": 0.02011007070541382, "rewards/margins": 0.32506948709487915, "rewards/rejected": -0.30495941638946533, "step": 1347 }, { "epoch": 0.16, "learning_rate": 2.572632564672831e-07, "logits/chosen": -3.854924201965332, "logits/rejected": -3.8617465496063232, "logps/chosen": -362.65655517578125, "logps/rejected": -331.388671875, "loss": 0.4386, "rewards/accuracies": 0.75, "rewards/chosen": -0.3759136199951172, "rewards/margins": 1.5254466533660889, "rewards/rejected": -1.901360273361206, "step": 1348 }, { "epoch": 0.16, "learning_rate": 2.5722813999765886e-07, "logits/chosen": -2.695035457611084, "logits/rejected": -2.9419705867767334, "logps/chosen": -213.92608642578125, "logps/rejected": -175.310546875, "loss": 0.8734, "rewards/accuracies": 0.375, "rewards/chosen": -0.2719343304634094, "rewards/margins": -0.2346378117799759, "rewards/rejected": -0.037296511232852936, "step": 1349 }, { "epoch": 0.16, "learning_rate": 2.5719302352803467e-07, "logits/chosen": -2.6823413372039795, "logits/rejected": -2.6569836139678955, "logps/chosen": -324.4682922363281, "logps/rejected": -293.76092529296875, "loss": 0.5095, "rewards/accuracies": 0.875, "rewards/chosen": 0.12054458260536194, "rewards/margins": 0.8754315376281738, "rewards/rejected": -0.7548869848251343, "step": 1350 }, { "epoch": 0.16, "learning_rate": 2.5715790705841037e-07, "logits/chosen": -3.385639190673828, "logits/rejected": -3.45224666595459, "logps/chosen": -265.8811340332031, "logps/rejected": -216.66966247558594, "loss": 0.3896, "rewards/accuracies": 0.875, "rewards/chosen": 0.006525054574012756, "rewards/margins": 1.8008615970611572, "rewards/rejected": -1.7943366765975952, "step": 1351 }, { "epoch": 0.16, "learning_rate": 2.571227905887861e-07, "logits/chosen": -3.4179399013519287, "logits/rejected": -3.3528499603271484, "logps/chosen": -196.7360076904297, "logps/rejected": -205.28848266601562, "loss": 0.4716, "rewards/accuracies": 0.75, "rewards/chosen": -0.18109527230262756, "rewards/margins": 1.005918264389038, "rewards/rejected": -1.1870136260986328, "step": 1352 }, { "epoch": 0.16, "learning_rate": 2.570876741191619e-07, "logits/chosen": -3.1486754417419434, "logits/rejected": -3.3651914596557617, "logps/chosen": -84.2898178100586, "logps/rejected": -138.9302215576172, "loss": 0.3656, "rewards/accuracies": 0.875, "rewards/chosen": 0.0016013383865356445, "rewards/margins": 1.6735140085220337, "rewards/rejected": -1.671912670135498, "step": 1353 }, { "epoch": 0.16, "learning_rate": 2.5705255764953763e-07, "logits/chosen": -3.6131789684295654, "logits/rejected": -3.488199472427368, "logps/chosen": -144.23800659179688, "logps/rejected": -162.8486785888672, "loss": 0.4406, "rewards/accuracies": 0.875, "rewards/chosen": -0.029750511050224304, "rewards/margins": 0.875946044921875, "rewards/rejected": -0.9056965708732605, "step": 1354 }, { "epoch": 0.16, "learning_rate": 2.570174411799134e-07, "logits/chosen": -3.6672186851501465, "logits/rejected": -3.3276636600494385, "logps/chosen": -161.36087036132812, "logps/rejected": -195.42633056640625, "loss": 0.3021, "rewards/accuracies": 0.75, "rewards/chosen": -0.19010359048843384, "rewards/margins": 2.0184736251831055, "rewards/rejected": -2.2085771560668945, "step": 1355 }, { "epoch": 0.16, "learning_rate": 2.569823247102891e-07, "logits/chosen": -2.987417697906494, "logits/rejected": -3.123345375061035, "logps/chosen": -342.34649658203125, "logps/rejected": -223.80052185058594, "loss": 0.4614, "rewards/accuracies": 0.75, "rewards/chosen": -0.09144477546215057, "rewards/margins": 0.9815898537635803, "rewards/rejected": -1.073034644126892, "step": 1356 }, { "epoch": 0.16, "learning_rate": 2.5694720824066484e-07, "logits/chosen": -2.8168628215789795, "logits/rejected": -2.63747501373291, "logps/chosen": -356.0416259765625, "logps/rejected": -284.3099060058594, "loss": 0.826, "rewards/accuracies": 0.375, "rewards/chosen": -0.673096776008606, "rewards/margins": 0.2346608191728592, "rewards/rejected": -0.9077576398849487, "step": 1357 }, { "epoch": 0.16, "learning_rate": 2.569120917710406e-07, "logits/chosen": -2.88814640045166, "logits/rejected": -2.9364426136016846, "logps/chosen": -263.3108215332031, "logps/rejected": -333.1740417480469, "loss": 0.4675, "rewards/accuracies": 0.75, "rewards/chosen": -0.2529527246952057, "rewards/margins": 1.2739853858947754, "rewards/rejected": -1.5269379615783691, "step": 1358 }, { "epoch": 0.16, "learning_rate": 2.5687697530141635e-07, "logits/chosen": -3.1437525749206543, "logits/rejected": -2.7502827644348145, "logps/chosen": -289.53216552734375, "logps/rejected": -283.1186828613281, "loss": 0.3312, "rewards/accuracies": 0.75, "rewards/chosen": -0.28042498230934143, "rewards/margins": 1.9493902921676636, "rewards/rejected": -2.2298152446746826, "step": 1359 }, { "epoch": 0.16, "learning_rate": 2.568418588317921e-07, "logits/chosen": -2.395545482635498, "logits/rejected": -2.6632871627807617, "logps/chosen": -352.8138427734375, "logps/rejected": -426.9617919921875, "loss": 0.4107, "rewards/accuracies": 0.75, "rewards/chosen": 0.2137509435415268, "rewards/margins": 1.4155316352844238, "rewards/rejected": -1.2017806768417358, "step": 1360 }, { "epoch": 0.16, "learning_rate": 2.5680674236216786e-07, "logits/chosen": -2.7834105491638184, "logits/rejected": -2.9712319374084473, "logps/chosen": -308.0777282714844, "logps/rejected": -240.0155029296875, "loss": 0.3269, "rewards/accuracies": 0.875, "rewards/chosen": 0.1949208676815033, "rewards/margins": 1.8474559783935547, "rewards/rejected": -1.6525352001190186, "step": 1361 }, { "epoch": 0.16, "learning_rate": 2.567716258925436e-07, "logits/chosen": -2.6352343559265137, "logits/rejected": -2.6359567642211914, "logps/chosen": -190.03099060058594, "logps/rejected": -298.51959228515625, "loss": 0.2553, "rewards/accuracies": 1.0, "rewards/chosen": 0.2650943696498871, "rewards/margins": 2.1437110900878906, "rewards/rejected": -1.8786166906356812, "step": 1362 }, { "epoch": 0.16, "learning_rate": 2.5673650942291937e-07, "logits/chosen": -3.2436914443969727, "logits/rejected": -3.6064529418945312, "logps/chosen": -197.88385009765625, "logps/rejected": -304.5501403808594, "loss": 0.1658, "rewards/accuracies": 1.0, "rewards/chosen": -0.3506697118282318, "rewards/margins": 2.064213991165161, "rewards/rejected": -2.414883852005005, "step": 1363 }, { "epoch": 0.16, "learning_rate": 2.5670139295329507e-07, "logits/chosen": -3.2190346717834473, "logits/rejected": -3.0140655040740967, "logps/chosen": -260.16802978515625, "logps/rejected": -297.23809814453125, "loss": 0.3128, "rewards/accuracies": 1.0, "rewards/chosen": -0.005299568176269531, "rewards/margins": 1.3440093994140625, "rewards/rejected": -1.349308967590332, "step": 1364 }, { "epoch": 0.16, "learning_rate": 2.566662764836708e-07, "logits/chosen": -3.5546624660491943, "logits/rejected": -3.093675374984741, "logps/chosen": -228.9453887939453, "logps/rejected": -128.35955810546875, "loss": 0.454, "rewards/accuracies": 0.75, "rewards/chosen": 0.2293323278427124, "rewards/margins": 0.9732205271720886, "rewards/rejected": -0.743888258934021, "step": 1365 }, { "epoch": 0.16, "learning_rate": 2.566311600140466e-07, "logits/chosen": -2.571366310119629, "logits/rejected": -2.435089111328125, "logps/chosen": -289.49371337890625, "logps/rejected": -245.40585327148438, "loss": 0.6215, "rewards/accuracies": 0.75, "rewards/chosen": 0.09940657019615173, "rewards/margins": 0.5679407119750977, "rewards/rejected": -0.4685341715812683, "step": 1366 }, { "epoch": 0.16, "learning_rate": 2.5659604354442233e-07, "logits/chosen": -3.2948272228240967, "logits/rejected": -3.4599037170410156, "logps/chosen": -281.29180908203125, "logps/rejected": -277.0350646972656, "loss": 0.6666, "rewards/accuracies": 0.5, "rewards/chosen": -0.6319332122802734, "rewards/margins": 0.886909008026123, "rewards/rejected": -1.5188422203063965, "step": 1367 }, { "epoch": 0.16, "learning_rate": 2.565609270747981e-07, "logits/chosen": -3.389592170715332, "logits/rejected": -3.4308433532714844, "logps/chosen": -132.67897033691406, "logps/rejected": -189.1661376953125, "loss": 0.4584, "rewards/accuracies": 0.75, "rewards/chosen": -0.030227214097976685, "rewards/margins": 1.4981093406677246, "rewards/rejected": -1.528336524963379, "step": 1368 }, { "epoch": 0.16, "learning_rate": 2.5652581060517384e-07, "logits/chosen": -2.6943979263305664, "logits/rejected": -3.1007564067840576, "logps/chosen": -334.94122314453125, "logps/rejected": -265.3455810546875, "loss": 0.24, "rewards/accuracies": 1.0, "rewards/chosen": 0.45796167850494385, "rewards/margins": 1.633500576019287, "rewards/rejected": -1.1755388975143433, "step": 1369 }, { "epoch": 0.16, "learning_rate": 2.5649069413554954e-07, "logits/chosen": -2.9270825386047363, "logits/rejected": -2.8515071868896484, "logps/chosen": -159.76052856445312, "logps/rejected": -262.5685729980469, "loss": 0.5884, "rewards/accuracies": 0.5, "rewards/chosen": -0.23473826050758362, "rewards/margins": 1.6722174882888794, "rewards/rejected": -1.906955599784851, "step": 1370 }, { "epoch": 0.16, "learning_rate": 2.564555776659253e-07, "logits/chosen": -2.6052656173706055, "logits/rejected": -2.7678017616271973, "logps/chosen": -104.9153060913086, "logps/rejected": -196.5333709716797, "loss": 0.5037, "rewards/accuracies": 0.75, "rewards/chosen": -0.01072102040052414, "rewards/margins": 0.6722900867462158, "rewards/rejected": -0.6830111145973206, "step": 1371 }, { "epoch": 0.16, "learning_rate": 2.5642046119630104e-07, "logits/chosen": -2.592020273208618, "logits/rejected": -2.6717891693115234, "logps/chosen": -463.33685302734375, "logps/rejected": -404.526611328125, "loss": 0.3905, "rewards/accuracies": 0.75, "rewards/chosen": 0.6120036244392395, "rewards/margins": 1.602557897567749, "rewards/rejected": -0.9905542135238647, "step": 1372 }, { "epoch": 0.16, "learning_rate": 2.563853447266768e-07, "logits/chosen": -3.421031951904297, "logits/rejected": -3.3631749153137207, "logps/chosen": -300.27581787109375, "logps/rejected": -216.16343688964844, "loss": 0.2445, "rewards/accuracies": 0.875, "rewards/chosen": -0.009352780878543854, "rewards/margins": 1.9669498205184937, "rewards/rejected": -1.9763026237487793, "step": 1373 }, { "epoch": 0.16, "learning_rate": 2.5635022825705255e-07, "logits/chosen": -3.7147321701049805, "logits/rejected": -3.2772140502929688, "logps/chosen": -410.2956237792969, "logps/rejected": -355.19927978515625, "loss": 0.457, "rewards/accuracies": 0.75, "rewards/chosen": 0.031219899654388428, "rewards/margins": 1.1931451559066772, "rewards/rejected": -1.1619253158569336, "step": 1374 }, { "epoch": 0.16, "learning_rate": 2.563151117874283e-07, "logits/chosen": -3.7261199951171875, "logits/rejected": -3.2402570247650146, "logps/chosen": -402.9207763671875, "logps/rejected": -289.322509765625, "loss": 0.3243, "rewards/accuracies": 0.875, "rewards/chosen": 0.16271284222602844, "rewards/margins": 1.8227170705795288, "rewards/rejected": -1.6600042581558228, "step": 1375 }, { "epoch": 0.16, "learning_rate": 2.5627999531780406e-07, "logits/chosen": -2.8411757946014404, "logits/rejected": -2.626554012298584, "logps/chosen": -246.9441680908203, "logps/rejected": -326.43756103515625, "loss": 0.4107, "rewards/accuracies": 0.875, "rewards/chosen": 0.03230428695678711, "rewards/margins": 1.521988868713379, "rewards/rejected": -1.4896845817565918, "step": 1376 }, { "epoch": 0.16, "learning_rate": 2.5624487884817976e-07, "logits/chosen": -2.673450469970703, "logits/rejected": -2.6322360038757324, "logps/chosen": -346.960693359375, "logps/rejected": -301.95562744140625, "loss": 0.5785, "rewards/accuracies": 0.75, "rewards/chosen": -0.05377506464719772, "rewards/margins": 0.8062514662742615, "rewards/rejected": -0.8600265383720398, "step": 1377 }, { "epoch": 0.16, "learning_rate": 2.562097623785555e-07, "logits/chosen": -3.582223653793335, "logits/rejected": -3.5398154258728027, "logps/chosen": -180.41880798339844, "logps/rejected": -205.47853088378906, "loss": 0.3274, "rewards/accuracies": 0.875, "rewards/chosen": -0.022180452942848206, "rewards/margins": 1.2614927291870117, "rewards/rejected": -1.2836731672286987, "step": 1378 }, { "epoch": 0.16, "learning_rate": 2.5617464590893127e-07, "logits/chosen": -2.4894216060638428, "logits/rejected": -2.6623215675354004, "logps/chosen": -287.93218994140625, "logps/rejected": -248.9035186767578, "loss": 0.4239, "rewards/accuracies": 0.875, "rewards/chosen": -0.24348139762878418, "rewards/margins": 1.2941224575042725, "rewards/rejected": -1.5376038551330566, "step": 1379 }, { "epoch": 0.16, "learning_rate": 2.56139529439307e-07, "logits/chosen": -3.425917148590088, "logits/rejected": -3.5656769275665283, "logps/chosen": -221.22520446777344, "logps/rejected": -235.19049072265625, "loss": 0.2744, "rewards/accuracies": 1.0, "rewards/chosen": -0.03283189237117767, "rewards/margins": 2.3579044342041016, "rewards/rejected": -2.3907361030578613, "step": 1380 }, { "epoch": 0.16, "learning_rate": 2.561044129696828e-07, "logits/chosen": -2.9858784675598145, "logits/rejected": -3.2091174125671387, "logps/chosen": -186.5463104248047, "logps/rejected": -304.19207763671875, "loss": 0.1565, "rewards/accuracies": 1.0, "rewards/chosen": -0.238559752702713, "rewards/margins": 2.2824764251708984, "rewards/rejected": -2.521036386489868, "step": 1381 }, { "epoch": 0.16, "learning_rate": 2.5606929650005853e-07, "logits/chosen": -3.080803394317627, "logits/rejected": -2.9514644145965576, "logps/chosen": -269.7321472167969, "logps/rejected": -209.8255157470703, "loss": 0.3882, "rewards/accuracies": 0.875, "rewards/chosen": 0.35495150089263916, "rewards/margins": 0.9708800315856934, "rewards/rejected": -0.615928590297699, "step": 1382 }, { "epoch": 0.16, "learning_rate": 2.5603418003043423e-07, "logits/chosen": -3.1469600200653076, "logits/rejected": -3.211188793182373, "logps/chosen": -190.2865447998047, "logps/rejected": -237.42807006835938, "loss": 0.2587, "rewards/accuracies": 0.875, "rewards/chosen": 0.47352489829063416, "rewards/margins": 2.0233206748962402, "rewards/rejected": -1.5497956275939941, "step": 1383 }, { "epoch": 0.16, "learning_rate": 2.5599906356081004e-07, "logits/chosen": -3.057302713394165, "logits/rejected": -2.9975523948669434, "logps/chosen": -182.8653564453125, "logps/rejected": -154.04212951660156, "loss": 0.5416, "rewards/accuracies": 0.625, "rewards/chosen": 0.11364522576332092, "rewards/margins": 0.730546772480011, "rewards/rejected": -0.6169015169143677, "step": 1384 }, { "epoch": 0.16, "learning_rate": 2.5596394709118574e-07, "logits/chosen": -3.468611717224121, "logits/rejected": -3.1657190322875977, "logps/chosen": -240.2515869140625, "logps/rejected": -177.2841033935547, "loss": 0.6151, "rewards/accuracies": 0.625, "rewards/chosen": 0.10170875489711761, "rewards/margins": 0.6754162907600403, "rewards/rejected": -0.5737075805664062, "step": 1385 }, { "epoch": 0.16, "learning_rate": 2.559288306215615e-07, "logits/chosen": -3.4515485763549805, "logits/rejected": -2.928544521331787, "logps/chosen": -249.31964111328125, "logps/rejected": -181.41744995117188, "loss": 0.7096, "rewards/accuracies": 0.625, "rewards/chosen": -0.08688148856163025, "rewards/margins": 1.3099149465560913, "rewards/rejected": -1.3967963457107544, "step": 1386 }, { "epoch": 0.16, "learning_rate": 2.5589371415193725e-07, "logits/chosen": -3.1115310192108154, "logits/rejected": -3.0429539680480957, "logps/chosen": -348.6588134765625, "logps/rejected": -289.6445617675781, "loss": 0.4574, "rewards/accuracies": 0.625, "rewards/chosen": -0.10212290287017822, "rewards/margins": 1.2949789762496948, "rewards/rejected": -1.3971019983291626, "step": 1387 }, { "epoch": 0.16, "learning_rate": 2.55858597682313e-07, "logits/chosen": -2.8216207027435303, "logits/rejected": -2.57527756690979, "logps/chosen": -409.5839538574219, "logps/rejected": -304.3885498046875, "loss": 0.3275, "rewards/accuracies": 0.75, "rewards/chosen": 0.19464054703712463, "rewards/margins": 2.1957032680511475, "rewards/rejected": -2.0010628700256348, "step": 1388 }, { "epoch": 0.16, "learning_rate": 2.5582348121268875e-07, "logits/chosen": -3.105970859527588, "logits/rejected": -3.1891326904296875, "logps/chosen": -144.0658416748047, "logps/rejected": -191.33673095703125, "loss": 0.3267, "rewards/accuracies": 0.875, "rewards/chosen": 0.10052652657032013, "rewards/margins": 1.432431697845459, "rewards/rejected": -1.3319051265716553, "step": 1389 }, { "epoch": 0.16, "learning_rate": 2.557883647430645e-07, "logits/chosen": -2.6900882720947266, "logits/rejected": -2.7059788703918457, "logps/chosen": -532.0977172851562, "logps/rejected": -358.5438537597656, "loss": 0.5714, "rewards/accuracies": 0.625, "rewards/chosen": -0.209239199757576, "rewards/margins": 0.9423686265945435, "rewards/rejected": -1.151607871055603, "step": 1390 }, { "epoch": 0.16, "learning_rate": 2.557532482734402e-07, "logits/chosen": -3.71512508392334, "logits/rejected": -3.428868293762207, "logps/chosen": -340.1872253417969, "logps/rejected": -208.19244384765625, "loss": 0.3845, "rewards/accuracies": 0.75, "rewards/chosen": -0.6652637720108032, "rewards/margins": 1.5700204372406006, "rewards/rejected": -2.2352843284606934, "step": 1391 }, { "epoch": 0.16, "learning_rate": 2.5571813180381596e-07, "logits/chosen": -3.448676347732544, "logits/rejected": -3.607452392578125, "logps/chosen": -230.37644958496094, "logps/rejected": -322.3721923828125, "loss": 0.4822, "rewards/accuracies": 0.875, "rewards/chosen": -0.22931000590324402, "rewards/margins": 1.4019787311553955, "rewards/rejected": -1.6312886476516724, "step": 1392 }, { "epoch": 0.16, "learning_rate": 2.556830153341917e-07, "logits/chosen": -2.775630474090576, "logits/rejected": -2.743398666381836, "logps/chosen": -304.58099365234375, "logps/rejected": -229.9409637451172, "loss": 0.322, "rewards/accuracies": 0.875, "rewards/chosen": -0.3270447552204132, "rewards/margins": 1.822581171989441, "rewards/rejected": -2.149625778198242, "step": 1393 }, { "epoch": 0.16, "learning_rate": 2.5564789886456747e-07, "logits/chosen": -2.7719762325286865, "logits/rejected": -2.8218207359313965, "logps/chosen": -264.3139343261719, "logps/rejected": -243.90940856933594, "loss": 0.3573, "rewards/accuracies": 0.875, "rewards/chosen": -0.3033883273601532, "rewards/margins": 1.013213038444519, "rewards/rejected": -1.316601276397705, "step": 1394 }, { "epoch": 0.16, "learning_rate": 2.556127823949432e-07, "logits/chosen": -2.7200276851654053, "logits/rejected": -2.755641460418701, "logps/chosen": -317.62371826171875, "logps/rejected": -245.68638610839844, "loss": 0.4419, "rewards/accuracies": 0.875, "rewards/chosen": 0.21496382355690002, "rewards/margins": 1.0160181522369385, "rewards/rejected": -0.8010542392730713, "step": 1395 }, { "epoch": 0.16, "learning_rate": 2.55577665925319e-07, "logits/chosen": -3.2336153984069824, "logits/rejected": -3.109405517578125, "logps/chosen": -284.75213623046875, "logps/rejected": -149.0267333984375, "loss": 0.5451, "rewards/accuracies": 0.625, "rewards/chosen": -0.14286932349205017, "rewards/margins": 0.6663054823875427, "rewards/rejected": -0.8091747164726257, "step": 1396 }, { "epoch": 0.16, "learning_rate": 2.5554254945569473e-07, "logits/chosen": -3.294424295425415, "logits/rejected": -3.430720329284668, "logps/chosen": -334.2586669921875, "logps/rejected": -166.92367553710938, "loss": 0.7215, "rewards/accuracies": 0.5, "rewards/chosen": -0.22337263822555542, "rewards/margins": 1.0314642190933228, "rewards/rejected": -1.254836916923523, "step": 1397 }, { "epoch": 0.16, "learning_rate": 2.555074329860705e-07, "logits/chosen": -3.60394024848938, "logits/rejected": -3.6011621952056885, "logps/chosen": -105.44680786132812, "logps/rejected": -185.07354736328125, "loss": 0.2512, "rewards/accuracies": 1.0, "rewards/chosen": -0.06786474585533142, "rewards/margins": 1.970583200454712, "rewards/rejected": -2.038447856903076, "step": 1398 }, { "epoch": 0.16, "learning_rate": 2.554723165164462e-07, "logits/chosen": -2.903747081756592, "logits/rejected": -3.1693594455718994, "logps/chosen": -297.9914855957031, "logps/rejected": -372.04876708984375, "loss": 0.4474, "rewards/accuracies": 0.625, "rewards/chosen": -0.012513790279626846, "rewards/margins": 1.2624541521072388, "rewards/rejected": -1.274968147277832, "step": 1399 }, { "epoch": 0.16, "learning_rate": 2.5543720004682194e-07, "logits/chosen": -2.9480085372924805, "logits/rejected": -2.8699285984039307, "logps/chosen": -218.75706481933594, "logps/rejected": -193.74713134765625, "loss": 0.2141, "rewards/accuracies": 1.0, "rewards/chosen": 0.09799454361200333, "rewards/margins": 2.174708366394043, "rewards/rejected": -2.076713800430298, "step": 1400 }, { "epoch": 0.16, "learning_rate": 2.554020835771977e-07, "logits/chosen": -3.033289909362793, "logits/rejected": -3.278651714324951, "logps/chosen": -326.92120361328125, "logps/rejected": -324.2908020019531, "loss": 0.5185, "rewards/accuracies": 0.75, "rewards/chosen": 0.1905927062034607, "rewards/margins": 0.9285632371902466, "rewards/rejected": -0.7379705309867859, "step": 1401 }, { "epoch": 0.16, "learning_rate": 2.5536696710757345e-07, "logits/chosen": -2.9420859813690186, "logits/rejected": -3.3301327228546143, "logps/chosen": -331.79266357421875, "logps/rejected": -210.67750549316406, "loss": 0.2849, "rewards/accuracies": 1.0, "rewards/chosen": 0.07497105002403259, "rewards/margins": 1.332480549812317, "rewards/rejected": -1.257509469985962, "step": 1402 }, { "epoch": 0.16, "learning_rate": 2.553318506379492e-07, "logits/chosen": -2.7265026569366455, "logits/rejected": -2.6109001636505127, "logps/chosen": -210.85317993164062, "logps/rejected": -267.5634765625, "loss": 0.5189, "rewards/accuracies": 0.875, "rewards/chosen": 0.012741759419441223, "rewards/margins": 0.6571580767631531, "rewards/rejected": -0.644416332244873, "step": 1403 }, { "epoch": 0.16, "learning_rate": 2.552967341683249e-07, "logits/chosen": -2.76473331451416, "logits/rejected": -2.766204357147217, "logps/chosen": -160.80166625976562, "logps/rejected": -169.257568359375, "loss": 0.3156, "rewards/accuracies": 0.75, "rewards/chosen": 0.5081496834754944, "rewards/margins": 1.5454492568969727, "rewards/rejected": -1.0372995138168335, "step": 1404 }, { "epoch": 0.16, "learning_rate": 2.5526161769870066e-07, "logits/chosen": -3.2509255409240723, "logits/rejected": -3.56001615524292, "logps/chosen": -299.73321533203125, "logps/rejected": -215.5147705078125, "loss": 0.2385, "rewards/accuracies": 1.0, "rewards/chosen": 0.25787806510925293, "rewards/margins": 2.0118966102600098, "rewards/rejected": -1.7540185451507568, "step": 1405 }, { "epoch": 0.16, "learning_rate": 2.5522650122907646e-07, "logits/chosen": -3.0658836364746094, "logits/rejected": -2.988920211791992, "logps/chosen": -254.1191864013672, "logps/rejected": -330.7019348144531, "loss": 0.4076, "rewards/accuracies": 0.875, "rewards/chosen": -0.567616879940033, "rewards/margins": 1.8362107276916504, "rewards/rejected": -2.403827428817749, "step": 1406 }, { "epoch": 0.16, "learning_rate": 2.5519138475945216e-07, "logits/chosen": -3.9214041233062744, "logits/rejected": -3.9448089599609375, "logps/chosen": -190.67385864257812, "logps/rejected": -207.74990844726562, "loss": 0.2945, "rewards/accuracies": 1.0, "rewards/chosen": 0.37404966354370117, "rewards/margins": 1.955817699432373, "rewards/rejected": -1.5817679166793823, "step": 1407 }, { "epoch": 0.16, "learning_rate": 2.551562682898279e-07, "logits/chosen": -3.058922290802002, "logits/rejected": -2.9888410568237305, "logps/chosen": -302.7970275878906, "logps/rejected": -224.65713500976562, "loss": 0.4919, "rewards/accuracies": 0.75, "rewards/chosen": -0.08770906925201416, "rewards/margins": 0.7677137851715088, "rewards/rejected": -0.8554227948188782, "step": 1408 }, { "epoch": 0.16, "learning_rate": 2.5512115182020367e-07, "logits/chosen": -3.2733347415924072, "logits/rejected": -3.214801788330078, "logps/chosen": -383.8865966796875, "logps/rejected": -323.6093444824219, "loss": 0.3678, "rewards/accuracies": 0.75, "rewards/chosen": 0.04087008535861969, "rewards/margins": 2.200723171234131, "rewards/rejected": -2.159853219985962, "step": 1409 }, { "epoch": 0.16, "learning_rate": 2.550860353505794e-07, "logits/chosen": -2.900692939758301, "logits/rejected": -2.9764909744262695, "logps/chosen": -353.63385009765625, "logps/rejected": -258.88299560546875, "loss": 0.4229, "rewards/accuracies": 0.875, "rewards/chosen": 0.0929201990365982, "rewards/margins": 1.258988857269287, "rewards/rejected": -1.1660685539245605, "step": 1410 }, { "epoch": 0.16, "learning_rate": 2.550509188809552e-07, "logits/chosen": -2.7306056022644043, "logits/rejected": -2.653247833251953, "logps/chosen": -282.013427734375, "logps/rejected": -299.36767578125, "loss": 0.696, "rewards/accuracies": 0.625, "rewards/chosen": -0.5762251615524292, "rewards/margins": 0.4028221070766449, "rewards/rejected": -0.9790472388267517, "step": 1411 }, { "epoch": 0.16, "learning_rate": 2.550158024113309e-07, "logits/chosen": -3.175196647644043, "logits/rejected": -3.1390161514282227, "logps/chosen": -208.0045166015625, "logps/rejected": -255.71461486816406, "loss": 0.401, "rewards/accuracies": 0.875, "rewards/chosen": -0.2842245101928711, "rewards/margins": 1.2009432315826416, "rewards/rejected": -1.4851677417755127, "step": 1412 }, { "epoch": 0.16, "learning_rate": 2.5498068594170663e-07, "logits/chosen": -3.1854653358459473, "logits/rejected": -3.4655566215515137, "logps/chosen": -262.5896301269531, "logps/rejected": -235.31613159179688, "loss": 0.3636, "rewards/accuracies": 0.875, "rewards/chosen": 0.053534265607595444, "rewards/margins": 1.620290756225586, "rewards/rejected": -1.5667563676834106, "step": 1413 }, { "epoch": 0.16, "learning_rate": 2.549455694720824e-07, "logits/chosen": -2.4620165824890137, "logits/rejected": -2.2541635036468506, "logps/chosen": -262.27032470703125, "logps/rejected": -199.74594116210938, "loss": 0.5566, "rewards/accuracies": 0.625, "rewards/chosen": -0.217573881149292, "rewards/margins": 0.7734484672546387, "rewards/rejected": -0.9910223484039307, "step": 1414 }, { "epoch": 0.16, "learning_rate": 2.5491045300245814e-07, "logits/chosen": -2.7298824787139893, "logits/rejected": -2.798719644546509, "logps/chosen": -363.1792907714844, "logps/rejected": -425.41326904296875, "loss": 0.6555, "rewards/accuracies": 0.625, "rewards/chosen": -0.29643192887306213, "rewards/margins": 0.6651434898376465, "rewards/rejected": -0.9615753889083862, "step": 1415 }, { "epoch": 0.16, "learning_rate": 2.548753365328339e-07, "logits/chosen": -3.4093003273010254, "logits/rejected": -3.226437568664551, "logps/chosen": -284.97686767578125, "logps/rejected": -256.1290283203125, "loss": 0.4435, "rewards/accuracies": 0.625, "rewards/chosen": 0.25546640157699585, "rewards/margins": 1.2007815837860107, "rewards/rejected": -0.9453150629997253, "step": 1416 }, { "epoch": 0.16, "learning_rate": 2.548402200632096e-07, "logits/chosen": -3.138772487640381, "logits/rejected": -3.6539604663848877, "logps/chosen": -140.26368713378906, "logps/rejected": -221.04527282714844, "loss": 0.1278, "rewards/accuracies": 1.0, "rewards/chosen": 0.35735249519348145, "rewards/margins": 2.7113747596740723, "rewards/rejected": -2.354022264480591, "step": 1417 }, { "epoch": 0.16, "learning_rate": 2.548051035935854e-07, "logits/chosen": -2.9735805988311768, "logits/rejected": -3.2216403484344482, "logps/chosen": -257.8381652832031, "logps/rejected": -175.92898559570312, "loss": 0.3158, "rewards/accuracies": 0.875, "rewards/chosen": 0.1546740084886551, "rewards/margins": 1.818570613861084, "rewards/rejected": -1.6638966798782349, "step": 1418 }, { "epoch": 0.16, "learning_rate": 2.5476998712396116e-07, "logits/chosen": -2.5199999809265137, "logits/rejected": -2.3516764640808105, "logps/chosen": -257.1839294433594, "logps/rejected": -159.08517456054688, "loss": 0.7009, "rewards/accuracies": 0.5, "rewards/chosen": -0.1417919099330902, "rewards/margins": 0.11230640113353729, "rewards/rejected": -0.2540982961654663, "step": 1419 }, { "epoch": 0.16, "learning_rate": 2.5473487065433686e-07, "logits/chosen": -3.119842767715454, "logits/rejected": -2.8308792114257812, "logps/chosen": -281.4576416015625, "logps/rejected": -308.8079833984375, "loss": 0.3824, "rewards/accuracies": 0.625, "rewards/chosen": 0.20565614104270935, "rewards/margins": 1.4788422584533691, "rewards/rejected": -1.2731860876083374, "step": 1420 }, { "epoch": 0.16, "learning_rate": 2.546997541847126e-07, "logits/chosen": -2.6137266159057617, "logits/rejected": -2.513880968093872, "logps/chosen": -244.74778747558594, "logps/rejected": -360.1038818359375, "loss": 0.8429, "rewards/accuracies": 0.375, "rewards/chosen": -0.1371239274740219, "rewards/margins": 0.37114816904067993, "rewards/rejected": -0.5082720518112183, "step": 1421 }, { "epoch": 0.16, "learning_rate": 2.5466463771508837e-07, "logits/chosen": -3.0677576065063477, "logits/rejected": -3.2199602127075195, "logps/chosen": -193.3337860107422, "logps/rejected": -176.3008270263672, "loss": 0.5845, "rewards/accuracies": 0.5, "rewards/chosen": -0.20253866910934448, "rewards/margins": 1.1353598833084106, "rewards/rejected": -1.3378983736038208, "step": 1422 }, { "epoch": 0.16, "learning_rate": 2.546295212454641e-07, "logits/chosen": -2.728231906890869, "logits/rejected": -2.893001079559326, "logps/chosen": -326.0724792480469, "logps/rejected": -267.9109802246094, "loss": 0.606, "rewards/accuracies": 0.625, "rewards/chosen": 0.14530105888843536, "rewards/margins": 0.9579471349716187, "rewards/rejected": -0.8126461505889893, "step": 1423 }, { "epoch": 0.16, "learning_rate": 2.545944047758399e-07, "logits/chosen": -3.5356459617614746, "logits/rejected": -3.059080123901367, "logps/chosen": -323.202392578125, "logps/rejected": -152.30972290039062, "loss": 0.6849, "rewards/accuracies": 0.375, "rewards/chosen": -0.6670114994049072, "rewards/margins": 0.565125048160553, "rewards/rejected": -1.2321364879608154, "step": 1424 }, { "epoch": 0.16, "learning_rate": 2.545592883062156e-07, "logits/chosen": -3.024059772491455, "logits/rejected": -3.453648090362549, "logps/chosen": -111.16154479980469, "logps/rejected": -215.12680053710938, "loss": 0.3988, "rewards/accuracies": 0.875, "rewards/chosen": 0.35778146982192993, "rewards/margins": 1.6886993646621704, "rewards/rejected": -1.3309178352355957, "step": 1425 }, { "epoch": 0.16, "learning_rate": 2.5452417183659133e-07, "logits/chosen": -2.4465439319610596, "logits/rejected": -2.389099597930908, "logps/chosen": -256.2645263671875, "logps/rejected": -239.9393310546875, "loss": 0.6257, "rewards/accuracies": 0.625, "rewards/chosen": -0.6622249484062195, "rewards/margins": 0.5436898469924927, "rewards/rejected": -1.2059147357940674, "step": 1426 }, { "epoch": 0.16, "learning_rate": 2.5448905536696714e-07, "logits/chosen": -3.1473026275634766, "logits/rejected": -2.9449586868286133, "logps/chosen": -312.6834716796875, "logps/rejected": -275.94464111328125, "loss": 0.3285, "rewards/accuracies": 0.875, "rewards/chosen": 0.425985187292099, "rewards/margins": 1.3570674657821655, "rewards/rejected": -0.9310822486877441, "step": 1427 }, { "epoch": 0.16, "learning_rate": 2.5445393889734284e-07, "logits/chosen": -3.9885454177856445, "logits/rejected": -3.911512613296509, "logps/chosen": -236.85885620117188, "logps/rejected": -263.17303466796875, "loss": 0.5588, "rewards/accuracies": 0.875, "rewards/chosen": -0.16725441813468933, "rewards/margins": 0.5852547287940979, "rewards/rejected": -0.7525091767311096, "step": 1428 }, { "epoch": 0.16, "learning_rate": 2.544188224277186e-07, "logits/chosen": -2.882204055786133, "logits/rejected": -2.9953107833862305, "logps/chosen": -308.7751159667969, "logps/rejected": -226.295654296875, "loss": 0.6722, "rewards/accuracies": 0.75, "rewards/chosen": -0.14277642965316772, "rewards/margins": 0.21266648173332214, "rewards/rejected": -0.35544294118881226, "step": 1429 }, { "epoch": 0.16, "learning_rate": 2.5438370595809434e-07, "logits/chosen": -3.1365585327148438, "logits/rejected": -2.8381552696228027, "logps/chosen": -310.3899841308594, "logps/rejected": -230.5426483154297, "loss": 0.3302, "rewards/accuracies": 0.875, "rewards/chosen": 0.18804927170276642, "rewards/margins": 1.2387969493865967, "rewards/rejected": -1.0507477521896362, "step": 1430 }, { "epoch": 0.16, "learning_rate": 2.543485894884701e-07, "logits/chosen": -3.2969491481781006, "logits/rejected": -3.455859899520874, "logps/chosen": -303.99932861328125, "logps/rejected": -277.1585388183594, "loss": 0.2286, "rewards/accuracies": 0.875, "rewards/chosen": 0.09842848777770996, "rewards/margins": 1.839847207069397, "rewards/rejected": -1.7414188385009766, "step": 1431 }, { "epoch": 0.17, "learning_rate": 2.5431347301884585e-07, "logits/chosen": -2.6270406246185303, "logits/rejected": -2.8720760345458984, "logps/chosen": -243.7661895751953, "logps/rejected": -311.8367004394531, "loss": 0.2174, "rewards/accuracies": 1.0, "rewards/chosen": 0.3001318573951721, "rewards/margins": 1.8832015991210938, "rewards/rejected": -1.5830698013305664, "step": 1432 }, { "epoch": 0.17, "learning_rate": 2.5427835654922155e-07, "logits/chosen": -3.280094861984253, "logits/rejected": -3.1904406547546387, "logps/chosen": -158.10226440429688, "logps/rejected": -151.3723907470703, "loss": 0.3439, "rewards/accuracies": 1.0, "rewards/chosen": -0.05821910500526428, "rewards/margins": 1.0154948234558105, "rewards/rejected": -1.073714017868042, "step": 1433 }, { "epoch": 0.17, "learning_rate": 2.542432400795973e-07, "logits/chosen": -3.441411256790161, "logits/rejected": -2.9636435508728027, "logps/chosen": -157.55398559570312, "logps/rejected": -308.5328674316406, "loss": 0.9159, "rewards/accuracies": 0.375, "rewards/chosen": -0.7791717052459717, "rewards/margins": -0.015420570969581604, "rewards/rejected": -0.7637511491775513, "step": 1434 }, { "epoch": 0.17, "learning_rate": 2.5420812360997306e-07, "logits/chosen": -3.434446334838867, "logits/rejected": -3.107944965362549, "logps/chosen": -281.3332214355469, "logps/rejected": -186.76759338378906, "loss": 0.5194, "rewards/accuracies": 0.625, "rewards/chosen": -0.21181698143482208, "rewards/margins": 0.7621076107025146, "rewards/rejected": -0.9739246368408203, "step": 1435 }, { "epoch": 0.17, "learning_rate": 2.541730071403488e-07, "logits/chosen": -2.982088327407837, "logits/rejected": -3.2290596961975098, "logps/chosen": -326.74676513671875, "logps/rejected": -215.49935913085938, "loss": 0.8542, "rewards/accuracies": 0.625, "rewards/chosen": -0.18189701437950134, "rewards/margins": 0.786063551902771, "rewards/rejected": -0.9679606556892395, "step": 1436 }, { "epoch": 0.17, "learning_rate": 2.5413789067072457e-07, "logits/chosen": -2.580385684967041, "logits/rejected": -2.457716464996338, "logps/chosen": -265.2939147949219, "logps/rejected": -193.81846618652344, "loss": 0.9095, "rewards/accuracies": 0.625, "rewards/chosen": 0.011305384337902069, "rewards/margins": -0.21677416563034058, "rewards/rejected": 0.22807952761650085, "step": 1437 }, { "epoch": 0.17, "learning_rate": 2.5410277420110027e-07, "logits/chosen": -2.7149603366851807, "logits/rejected": -2.843262195587158, "logps/chosen": -161.65170288085938, "logps/rejected": -234.25845336914062, "loss": 0.4125, "rewards/accuracies": 0.875, "rewards/chosen": -0.15995177626609802, "rewards/margins": 1.0177949666976929, "rewards/rejected": -1.1777467727661133, "step": 1438 }, { "epoch": 0.17, "learning_rate": 2.54067657731476e-07, "logits/chosen": -3.739898920059204, "logits/rejected": -3.83449649810791, "logps/chosen": -183.050048828125, "logps/rejected": -194.6539764404297, "loss": 0.4051, "rewards/accuracies": 0.875, "rewards/chosen": 0.14488141238689423, "rewards/margins": 0.9840406179428101, "rewards/rejected": -0.8391591310501099, "step": 1439 }, { "epoch": 0.17, "learning_rate": 2.5403254126185183e-07, "logits/chosen": -3.525508403778076, "logits/rejected": -3.204705238342285, "logps/chosen": -222.77731323242188, "logps/rejected": -170.7025146484375, "loss": 0.3879, "rewards/accuracies": 0.875, "rewards/chosen": 0.08949099481105804, "rewards/margins": 1.09969961643219, "rewards/rejected": -1.0102087259292603, "step": 1440 }, { "epoch": 0.17, "learning_rate": 2.5399742479222753e-07, "logits/chosen": -3.348600387573242, "logits/rejected": -3.1928982734680176, "logps/chosen": -303.0113830566406, "logps/rejected": -239.31076049804688, "loss": 0.6129, "rewards/accuracies": 0.5, "rewards/chosen": 0.17562898993492126, "rewards/margins": 0.29520368576049805, "rewards/rejected": -0.119574636220932, "step": 1441 }, { "epoch": 0.17, "learning_rate": 2.539623083226033e-07, "logits/chosen": -3.233574867248535, "logits/rejected": -3.428771734237671, "logps/chosen": -117.05130767822266, "logps/rejected": -156.03131103515625, "loss": 0.6878, "rewards/accuracies": 0.5, "rewards/chosen": -0.07057924568653107, "rewards/margins": 0.6292253732681274, "rewards/rejected": -0.6998046636581421, "step": 1442 }, { "epoch": 0.17, "learning_rate": 2.5392719185297904e-07, "logits/chosen": -2.8919119834899902, "logits/rejected": -2.881925344467163, "logps/chosen": -226.90806579589844, "logps/rejected": -209.27633666992188, "loss": 0.7011, "rewards/accuracies": 0.375, "rewards/chosen": -0.37659284472465515, "rewards/margins": 0.14072071015834808, "rewards/rejected": -0.517313539981842, "step": 1443 }, { "epoch": 0.17, "learning_rate": 2.538920753833548e-07, "logits/chosen": -3.191077947616577, "logits/rejected": -3.1942694187164307, "logps/chosen": -264.18048095703125, "logps/rejected": -271.5841064453125, "loss": 0.3131, "rewards/accuracies": 0.75, "rewards/chosen": -0.05281936004757881, "rewards/margins": 2.2398056983947754, "rewards/rejected": -2.2926251888275146, "step": 1444 }, { "epoch": 0.17, "learning_rate": 2.5385695891373055e-07, "logits/chosen": -2.6050026416778564, "logits/rejected": -2.6897764205932617, "logps/chosen": -167.01034545898438, "logps/rejected": -239.47137451171875, "loss": 0.5228, "rewards/accuracies": 0.75, "rewards/chosen": -0.10167495906352997, "rewards/margins": 0.7236835360527039, "rewards/rejected": -0.8253585696220398, "step": 1445 }, { "epoch": 0.17, "learning_rate": 2.5382184244410625e-07, "logits/chosen": -2.561619520187378, "logits/rejected": -2.4731087684631348, "logps/chosen": -387.9241943359375, "logps/rejected": -223.4948272705078, "loss": 0.3081, "rewards/accuracies": 0.875, "rewards/chosen": 0.2910845875740051, "rewards/margins": 1.4601305723190308, "rewards/rejected": -1.1690459251403809, "step": 1446 }, { "epoch": 0.17, "learning_rate": 2.53786725974482e-07, "logits/chosen": -2.88962721824646, "logits/rejected": -2.7959156036376953, "logps/chosen": -206.5690460205078, "logps/rejected": -147.1153106689453, "loss": 0.596, "rewards/accuracies": 0.5, "rewards/chosen": -0.21089939773082733, "rewards/margins": 1.0483102798461914, "rewards/rejected": -1.2592097520828247, "step": 1447 }, { "epoch": 0.17, "learning_rate": 2.5375160950485775e-07, "logits/chosen": -3.0684139728546143, "logits/rejected": -3.082451581954956, "logps/chosen": -327.32110595703125, "logps/rejected": -494.4891357421875, "loss": 0.3459, "rewards/accuracies": 1.0, "rewards/chosen": 0.15136921405792236, "rewards/margins": 1.9616177082061768, "rewards/rejected": -1.8102483749389648, "step": 1448 }, { "epoch": 0.17, "learning_rate": 2.537164930352335e-07, "logits/chosen": -3.188560724258423, "logits/rejected": -3.1578826904296875, "logps/chosen": -261.8722229003906, "logps/rejected": -248.90887451171875, "loss": 0.37, "rewards/accuracies": 1.0, "rewards/chosen": 0.142405703663826, "rewards/margins": 1.1259419918060303, "rewards/rejected": -0.9835363030433655, "step": 1449 }, { "epoch": 0.17, "learning_rate": 2.5368137656560926e-07, "logits/chosen": -2.3176498413085938, "logits/rejected": -2.564669132232666, "logps/chosen": -324.75689697265625, "logps/rejected": -237.09190368652344, "loss": 0.3161, "rewards/accuracies": 0.75, "rewards/chosen": 0.5812521576881409, "rewards/margins": 1.5060019493103027, "rewards/rejected": -0.9247497916221619, "step": 1450 }, { "epoch": 0.17, "learning_rate": 2.53646260095985e-07, "logits/chosen": -3.1454029083251953, "logits/rejected": -2.87129282951355, "logps/chosen": -278.41424560546875, "logps/rejected": -228.3530731201172, "loss": 0.6634, "rewards/accuracies": 0.5, "rewards/chosen": -0.04729343205690384, "rewards/margins": 0.9725439548492432, "rewards/rejected": -1.0198372602462769, "step": 1451 }, { "epoch": 0.17, "learning_rate": 2.5361114362636077e-07, "logits/chosen": -3.744586229324341, "logits/rejected": -3.8065309524536133, "logps/chosen": -210.700927734375, "logps/rejected": -191.57992553710938, "loss": 0.6531, "rewards/accuracies": 0.5, "rewards/chosen": -0.3940339684486389, "rewards/margins": 1.0979719161987305, "rewards/rejected": -1.4920058250427246, "step": 1452 }, { "epoch": 0.17, "learning_rate": 2.535760271567365e-07, "logits/chosen": -3.4310383796691895, "logits/rejected": -3.348679780960083, "logps/chosen": -249.92538452148438, "logps/rejected": -609.4087524414062, "loss": 0.458, "rewards/accuracies": 0.875, "rewards/chosen": 0.0054101720452308655, "rewards/margins": 0.9296847581863403, "rewards/rejected": -0.9242745637893677, "step": 1453 }, { "epoch": 0.17, "learning_rate": 2.535409106871122e-07, "logits/chosen": -2.9084067344665527, "logits/rejected": -2.95662784576416, "logps/chosen": -182.98326110839844, "logps/rejected": -299.76788330078125, "loss": 0.3571, "rewards/accuracies": 0.875, "rewards/chosen": 0.11952178925275803, "rewards/margins": 1.6894251108169556, "rewards/rejected": -1.5699032545089722, "step": 1454 }, { "epoch": 0.17, "learning_rate": 2.53505794217488e-07, "logits/chosen": -2.547166109085083, "logits/rejected": -2.9583826065063477, "logps/chosen": -255.48046875, "logps/rejected": -160.6226806640625, "loss": 0.6756, "rewards/accuracies": 0.5, "rewards/chosen": -0.6849334836006165, "rewards/margins": 0.16748090088367462, "rewards/rejected": -0.8524143695831299, "step": 1455 }, { "epoch": 0.17, "learning_rate": 2.5347067774786373e-07, "logits/chosen": -2.5268592834472656, "logits/rejected": -2.4770936965942383, "logps/chosen": -210.41299438476562, "logps/rejected": -273.1662292480469, "loss": 0.6845, "rewards/accuracies": 0.625, "rewards/chosen": -0.4524649679660797, "rewards/margins": 0.1581394523382187, "rewards/rejected": -0.6106044054031372, "step": 1456 }, { "epoch": 0.17, "learning_rate": 2.534355612782395e-07, "logits/chosen": -2.7171380519866943, "logits/rejected": -2.9614906311035156, "logps/chosen": -387.09613037109375, "logps/rejected": -233.01150512695312, "loss": 0.3176, "rewards/accuracies": 0.875, "rewards/chosen": 0.20289286971092224, "rewards/margins": 1.8997175693511963, "rewards/rejected": -1.6968247890472412, "step": 1457 }, { "epoch": 0.17, "learning_rate": 2.5340044480861524e-07, "logits/chosen": -3.22169828414917, "logits/rejected": -3.0274715423583984, "logps/chosen": -156.74111938476562, "logps/rejected": -160.19729614257812, "loss": 0.3713, "rewards/accuracies": 0.875, "rewards/chosen": -0.21668705344200134, "rewards/margins": 1.3162301778793335, "rewards/rejected": -1.5329172611236572, "step": 1458 }, { "epoch": 0.17, "learning_rate": 2.53365328338991e-07, "logits/chosen": -3.1705307960510254, "logits/rejected": -2.92834734916687, "logps/chosen": -345.17236328125, "logps/rejected": -196.11795043945312, "loss": 0.3713, "rewards/accuracies": 1.0, "rewards/chosen": 0.1067657470703125, "rewards/margins": 0.9882016181945801, "rewards/rejected": -0.8814358711242676, "step": 1459 }, { "epoch": 0.17, "learning_rate": 2.533302118693667e-07, "logits/chosen": -2.959587812423706, "logits/rejected": -3.300440549850464, "logps/chosen": -182.3204345703125, "logps/rejected": -232.6237030029297, "loss": 0.308, "rewards/accuracies": 0.875, "rewards/chosen": 0.5622125864028931, "rewards/margins": 1.6167045831680298, "rewards/rejected": -1.0544921159744263, "step": 1460 }, { "epoch": 0.17, "learning_rate": 2.532950953997425e-07, "logits/chosen": -2.762432098388672, "logits/rejected": -3.162893772125244, "logps/chosen": -171.29266357421875, "logps/rejected": -312.14019775390625, "loss": 0.2784, "rewards/accuracies": 0.875, "rewards/chosen": 0.14879512786865234, "rewards/margins": 2.1880366802215576, "rewards/rejected": -2.0392415523529053, "step": 1461 }, { "epoch": 0.17, "learning_rate": 2.532599789301182e-07, "logits/chosen": -3.7785749435424805, "logits/rejected": -3.501652717590332, "logps/chosen": -280.18634033203125, "logps/rejected": -282.50616455078125, "loss": 0.4051, "rewards/accuracies": 0.75, "rewards/chosen": -0.2677035331726074, "rewards/margins": 1.1350148916244507, "rewards/rejected": -1.402718424797058, "step": 1462 }, { "epoch": 0.17, "learning_rate": 2.5322486246049396e-07, "logits/chosen": -2.3671164512634277, "logits/rejected": -2.6628472805023193, "logps/chosen": -370.0285949707031, "logps/rejected": -200.931884765625, "loss": 0.4575, "rewards/accuracies": 0.875, "rewards/chosen": 0.2552938759326935, "rewards/margins": 0.9206752777099609, "rewards/rejected": -0.6653813719749451, "step": 1463 }, { "epoch": 0.17, "learning_rate": 2.531897459908697e-07, "logits/chosen": -3.276956796646118, "logits/rejected": -3.230988025665283, "logps/chosen": -354.437744140625, "logps/rejected": -293.67919921875, "loss": 0.4872, "rewards/accuracies": 0.75, "rewards/chosen": -0.009841442108154297, "rewards/margins": 1.6163280010223389, "rewards/rejected": -1.6261694431304932, "step": 1464 }, { "epoch": 0.17, "learning_rate": 2.5315462952124546e-07, "logits/chosen": -3.958280563354492, "logits/rejected": -3.4684972763061523, "logps/chosen": -142.178955078125, "logps/rejected": -198.68429565429688, "loss": 0.6072, "rewards/accuracies": 0.375, "rewards/chosen": -0.5083469152450562, "rewards/margins": 0.3178911805152893, "rewards/rejected": -0.8262380361557007, "step": 1465 }, { "epoch": 0.17, "learning_rate": 2.531195130516212e-07, "logits/chosen": -2.6625940799713135, "logits/rejected": -2.6984243392944336, "logps/chosen": -166.5047607421875, "logps/rejected": -184.11871337890625, "loss": 0.6604, "rewards/accuracies": 0.75, "rewards/chosen": -0.3204686641693115, "rewards/margins": 1.0151715278625488, "rewards/rejected": -1.3356401920318604, "step": 1466 }, { "epoch": 0.17, "learning_rate": 2.5308439658199697e-07, "logits/chosen": -2.70451021194458, "logits/rejected": -3.227217674255371, "logps/chosen": -301.150390625, "logps/rejected": -261.32763671875, "loss": 0.3475, "rewards/accuracies": 0.875, "rewards/chosen": 0.0415346622467041, "rewards/margins": 1.9166679382324219, "rewards/rejected": -1.8751332759857178, "step": 1467 }, { "epoch": 0.17, "learning_rate": 2.5304928011237267e-07, "logits/chosen": -2.750596761703491, "logits/rejected": -2.7246763706207275, "logps/chosen": -352.0214538574219, "logps/rejected": -309.0485534667969, "loss": 0.5166, "rewards/accuracies": 0.75, "rewards/chosen": -0.40644171833992004, "rewards/margins": 0.7567801475524902, "rewards/rejected": -1.163221836090088, "step": 1468 }, { "epoch": 0.17, "learning_rate": 2.5301416364274843e-07, "logits/chosen": -3.0502450466156006, "logits/rejected": -2.9883627891540527, "logps/chosen": -279.7407531738281, "logps/rejected": -242.40512084960938, "loss": 0.3449, "rewards/accuracies": 0.875, "rewards/chosen": 0.14638537168502808, "rewards/margins": 1.4199833869934082, "rewards/rejected": -1.2735979557037354, "step": 1469 }, { "epoch": 0.17, "learning_rate": 2.529790471731242e-07, "logits/chosen": -3.2775676250457764, "logits/rejected": -3.4606127738952637, "logps/chosen": -172.9986572265625, "logps/rejected": -204.53424072265625, "loss": 0.4707, "rewards/accuracies": 0.625, "rewards/chosen": -0.17027579247951508, "rewards/margins": 1.6353858709335327, "rewards/rejected": -1.805661678314209, "step": 1470 }, { "epoch": 0.17, "learning_rate": 2.5294393070349993e-07, "logits/chosen": -3.2827870845794678, "logits/rejected": -3.176438093185425, "logps/chosen": -423.6960754394531, "logps/rejected": -329.7424621582031, "loss": 0.1103, "rewards/accuracies": 1.0, "rewards/chosen": 0.3224334716796875, "rewards/margins": 2.721797227859497, "rewards/rejected": -2.3993639945983887, "step": 1471 }, { "epoch": 0.17, "learning_rate": 2.529088142338757e-07, "logits/chosen": -2.4656100273132324, "logits/rejected": -2.5939559936523438, "logps/chosen": -354.7267150878906, "logps/rejected": -391.6355895996094, "loss": 0.3407, "rewards/accuracies": 0.875, "rewards/chosen": 0.38906872272491455, "rewards/margins": 1.0885554552078247, "rewards/rejected": -0.6994867324829102, "step": 1472 }, { "epoch": 0.17, "learning_rate": 2.528736977642514e-07, "logits/chosen": -3.2631592750549316, "logits/rejected": -3.3675734996795654, "logps/chosen": -224.52090454101562, "logps/rejected": -180.0391845703125, "loss": 1.1499, "rewards/accuracies": 0.5, "rewards/chosen": -0.28892096877098083, "rewards/margins": 0.10618922114372253, "rewards/rejected": -0.39511021971702576, "step": 1473 }, { "epoch": 0.17, "learning_rate": 2.528385812946272e-07, "logits/chosen": -2.901008367538452, "logits/rejected": -3.1624083518981934, "logps/chosen": -264.330810546875, "logps/rejected": -174.38119506835938, "loss": 0.4709, "rewards/accuracies": 0.75, "rewards/chosen": -0.17445456981658936, "rewards/margins": 0.693864643573761, "rewards/rejected": -0.8683191537857056, "step": 1474 }, { "epoch": 0.17, "learning_rate": 2.528034648250029e-07, "logits/chosen": -3.1642580032348633, "logits/rejected": -3.069148540496826, "logps/chosen": -305.0733947753906, "logps/rejected": -207.73834228515625, "loss": 0.2639, "rewards/accuracies": 1.0, "rewards/chosen": 0.8478806614875793, "rewards/margins": 1.9835484027862549, "rewards/rejected": -1.1356676816940308, "step": 1475 }, { "epoch": 0.17, "learning_rate": 2.5276834835537865e-07, "logits/chosen": -3.209831714630127, "logits/rejected": -3.1313014030456543, "logps/chosen": -225.40476989746094, "logps/rejected": -267.27935791015625, "loss": 0.1668, "rewards/accuracies": 1.0, "rewards/chosen": 0.25886791944503784, "rewards/margins": 1.9014215469360352, "rewards/rejected": -1.6425535678863525, "step": 1476 }, { "epoch": 0.17, "learning_rate": 2.527332318857544e-07, "logits/chosen": -2.7364768981933594, "logits/rejected": -2.755094528198242, "logps/chosen": -369.8105163574219, "logps/rejected": -227.78607177734375, "loss": 0.358, "rewards/accuracies": 0.875, "rewards/chosen": -0.030137568712234497, "rewards/margins": 1.3893910646438599, "rewards/rejected": -1.419528603553772, "step": 1477 }, { "epoch": 0.17, "learning_rate": 2.5269811541613016e-07, "logits/chosen": -3.484485626220703, "logits/rejected": -3.40386962890625, "logps/chosen": -319.8224792480469, "logps/rejected": -183.54689025878906, "loss": 0.4416, "rewards/accuracies": 0.875, "rewards/chosen": -0.03464818000793457, "rewards/margins": 1.0196079015731812, "rewards/rejected": -1.0542562007904053, "step": 1478 }, { "epoch": 0.17, "learning_rate": 2.526629989465059e-07, "logits/chosen": -2.840170383453369, "logits/rejected": -2.5846152305603027, "logps/chosen": -478.67913818359375, "logps/rejected": -347.0202331542969, "loss": 0.5693, "rewards/accuracies": 0.625, "rewards/chosen": -0.47054025530815125, "rewards/margins": 0.8161214590072632, "rewards/rejected": -1.2866617441177368, "step": 1479 }, { "epoch": 0.17, "learning_rate": 2.5262788247688167e-07, "logits/chosen": -3.4284145832061768, "logits/rejected": -3.521925926208496, "logps/chosen": -190.2987823486328, "logps/rejected": -235.26800537109375, "loss": 0.3757, "rewards/accuracies": 0.875, "rewards/chosen": 0.24903905391693115, "rewards/margins": 1.112391471862793, "rewards/rejected": -0.8633524179458618, "step": 1480 }, { "epoch": 0.17, "learning_rate": 2.5259276600725737e-07, "logits/chosen": -2.452449083328247, "logits/rejected": -2.6403400897979736, "logps/chosen": -220.6059112548828, "logps/rejected": -292.6126403808594, "loss": 0.3999, "rewards/accuracies": 0.875, "rewards/chosen": -0.013939021155238152, "rewards/margins": 0.9825767874717712, "rewards/rejected": -0.9965158104896545, "step": 1481 }, { "epoch": 0.17, "learning_rate": 2.525576495376331e-07, "logits/chosen": -2.3533377647399902, "logits/rejected": -2.173398494720459, "logps/chosen": -285.12921142578125, "logps/rejected": -332.5311279296875, "loss": 0.6385, "rewards/accuracies": 0.5, "rewards/chosen": 0.12080040574073792, "rewards/margins": 0.3772051930427551, "rewards/rejected": -0.2564047574996948, "step": 1482 }, { "epoch": 0.17, "learning_rate": 2.525225330680089e-07, "logits/chosen": -2.3709564208984375, "logits/rejected": -2.4902408123016357, "logps/chosen": -276.57794189453125, "logps/rejected": -315.3499755859375, "loss": 0.5572, "rewards/accuracies": 0.625, "rewards/chosen": -0.33589598536491394, "rewards/margins": 0.8976808786392212, "rewards/rejected": -1.233577013015747, "step": 1483 }, { "epoch": 0.17, "learning_rate": 2.5248741659838463e-07, "logits/chosen": -2.551868438720703, "logits/rejected": -2.747617483139038, "logps/chosen": -227.4838409423828, "logps/rejected": -255.47607421875, "loss": 0.445, "rewards/accuracies": 0.75, "rewards/chosen": -0.038092032074928284, "rewards/margins": 0.9756458401679993, "rewards/rejected": -1.0137379169464111, "step": 1484 }, { "epoch": 0.17, "learning_rate": 2.524523001287604e-07, "logits/chosen": -3.8182642459869385, "logits/rejected": -3.6585323810577393, "logps/chosen": -233.0343780517578, "logps/rejected": -254.59454345703125, "loss": 0.6396, "rewards/accuracies": 0.75, "rewards/chosen": -0.42380088567733765, "rewards/margins": 0.45865052938461304, "rewards/rejected": -0.8824514150619507, "step": 1485 }, { "epoch": 0.17, "learning_rate": 2.5241718365913614e-07, "logits/chosen": -2.9236905574798584, "logits/rejected": -2.729034185409546, "logps/chosen": -253.08409118652344, "logps/rejected": -233.7904815673828, "loss": 0.5589, "rewards/accuracies": 0.625, "rewards/chosen": -0.5912806391716003, "rewards/margins": 1.1578669548034668, "rewards/rejected": -1.749147653579712, "step": 1486 }, { "epoch": 0.17, "learning_rate": 2.523820671895119e-07, "logits/chosen": -2.9666190147399902, "logits/rejected": -3.2817306518554688, "logps/chosen": -329.497314453125, "logps/rejected": -278.8468017578125, "loss": 0.5709, "rewards/accuracies": 0.75, "rewards/chosen": 0.007200382649898529, "rewards/margins": 1.053473949432373, "rewards/rejected": -1.0462735891342163, "step": 1487 }, { "epoch": 0.17, "learning_rate": 2.5234695071988764e-07, "logits/chosen": -3.2970314025878906, "logits/rejected": -3.1642990112304688, "logps/chosen": -418.9378967285156, "logps/rejected": -285.11322021484375, "loss": 0.4103, "rewards/accuracies": 0.875, "rewards/chosen": 0.033888038247823715, "rewards/margins": 2.2888152599334717, "rewards/rejected": -2.254927158355713, "step": 1488 }, { "epoch": 0.17, "learning_rate": 2.5231183425026335e-07, "logits/chosen": -2.9056015014648438, "logits/rejected": -2.822819948196411, "logps/chosen": -144.73825073242188, "logps/rejected": -194.91416931152344, "loss": 0.5054, "rewards/accuracies": 0.75, "rewards/chosen": -0.21557393670082092, "rewards/margins": 0.5858629941940308, "rewards/rejected": -0.8014369606971741, "step": 1489 }, { "epoch": 0.17, "learning_rate": 2.522767177806391e-07, "logits/chosen": -3.5405848026275635, "logits/rejected": -3.176156520843506, "logps/chosen": -420.5745849609375, "logps/rejected": -404.83551025390625, "loss": 1.2178, "rewards/accuracies": 0.5, "rewards/chosen": -0.17814184725284576, "rewards/margins": 0.3955309987068176, "rewards/rejected": -0.5736728310585022, "step": 1490 }, { "epoch": 0.17, "learning_rate": 2.5224160131101485e-07, "logits/chosen": -2.5740370750427246, "logits/rejected": -2.3131885528564453, "logps/chosen": -362.47308349609375, "logps/rejected": -329.52801513671875, "loss": 0.5806, "rewards/accuracies": 0.75, "rewards/chosen": -0.0076027363538742065, "rewards/margins": 0.439958781003952, "rewards/rejected": -0.4475615620613098, "step": 1491 }, { "epoch": 0.17, "learning_rate": 2.522064848413906e-07, "logits/chosen": -3.3274459838867188, "logits/rejected": -3.0136053562164307, "logps/chosen": -185.93385314941406, "logps/rejected": -217.38868713378906, "loss": 0.292, "rewards/accuracies": 1.0, "rewards/chosen": 0.2784789800643921, "rewards/margins": 1.7974997758865356, "rewards/rejected": -1.519020915031433, "step": 1492 }, { "epoch": 0.17, "learning_rate": 2.5217136837176636e-07, "logits/chosen": -3.760270357131958, "logits/rejected": -3.2762935161590576, "logps/chosen": -251.96646118164062, "logps/rejected": -295.4446716308594, "loss": 0.4716, "rewards/accuracies": 0.75, "rewards/chosen": -0.04217401146888733, "rewards/margins": 1.0856494903564453, "rewards/rejected": -1.1278234720230103, "step": 1493 }, { "epoch": 0.17, "learning_rate": 2.5213625190214206e-07, "logits/chosen": -3.0328121185302734, "logits/rejected": -2.936918258666992, "logps/chosen": -201.1170196533203, "logps/rejected": -168.11474609375, "loss": 0.8659, "rewards/accuracies": 0.5, "rewards/chosen": -0.6281031966209412, "rewards/margins": 0.08696767687797546, "rewards/rejected": -0.7150708436965942, "step": 1494 }, { "epoch": 0.17, "learning_rate": 2.5210113543251787e-07, "logits/chosen": -3.338139533996582, "logits/rejected": -3.2716612815856934, "logps/chosen": -144.10745239257812, "logps/rejected": -215.35394287109375, "loss": 0.3836, "rewards/accuracies": 0.75, "rewards/chosen": 0.05990919470787048, "rewards/margins": 1.7733186483383179, "rewards/rejected": -1.713409423828125, "step": 1495 }, { "epoch": 0.17, "learning_rate": 2.520660189628936e-07, "logits/chosen": -3.7239742279052734, "logits/rejected": -3.289851427078247, "logps/chosen": -302.9237060546875, "logps/rejected": -237.79861450195312, "loss": 0.2464, "rewards/accuracies": 0.875, "rewards/chosen": 0.38266369700431824, "rewards/margins": 1.9189233779907227, "rewards/rejected": -1.5362597703933716, "step": 1496 }, { "epoch": 0.17, "learning_rate": 2.520309024932693e-07, "logits/chosen": -3.18709135055542, "logits/rejected": -3.2826099395751953, "logps/chosen": -145.81912231445312, "logps/rejected": -116.89448547363281, "loss": 0.5348, "rewards/accuracies": 0.5, "rewards/chosen": -0.004350811243057251, "rewards/margins": 0.8543806672096252, "rewards/rejected": -0.8587315082550049, "step": 1497 }, { "epoch": 0.17, "learning_rate": 2.519957860236451e-07, "logits/chosen": -3.356926679611206, "logits/rejected": -3.1862244606018066, "logps/chosen": -416.5458068847656, "logps/rejected": -238.13705444335938, "loss": 0.3507, "rewards/accuracies": 0.875, "rewards/chosen": -0.08060942590236664, "rewards/margins": 1.4154481887817383, "rewards/rejected": -1.4960577487945557, "step": 1498 }, { "epoch": 0.17, "learning_rate": 2.5196066955402083e-07, "logits/chosen": -3.050638198852539, "logits/rejected": -3.012503147125244, "logps/chosen": -145.2467041015625, "logps/rejected": -184.32815551757812, "loss": 0.7082, "rewards/accuracies": 0.375, "rewards/chosen": -0.6995250582695007, "rewards/margins": 0.56214439868927, "rewards/rejected": -1.2616695165634155, "step": 1499 }, { "epoch": 0.17, "learning_rate": 2.519255530843966e-07, "logits/chosen": -3.6339104175567627, "logits/rejected": -3.552980422973633, "logps/chosen": -240.80221557617188, "logps/rejected": -224.62625122070312, "loss": 0.3468, "rewards/accuracies": 0.875, "rewards/chosen": -0.19340506196022034, "rewards/margins": 1.5050827264785767, "rewards/rejected": -1.6984878778457642, "step": 1500 }, { "epoch": 0.17, "learning_rate": 2.5189043661477234e-07, "logits/chosen": -3.8133316040039062, "logits/rejected": -3.6130716800689697, "logps/chosen": -183.78750610351562, "logps/rejected": -259.74945068359375, "loss": 0.3945, "rewards/accuracies": 0.75, "rewards/chosen": -0.07362888008356094, "rewards/margins": 2.4486570358276367, "rewards/rejected": -2.5222856998443604, "step": 1501 }, { "epoch": 0.17, "learning_rate": 2.5185532014514804e-07, "logits/chosen": -3.3617734909057617, "logits/rejected": -3.133798837661743, "logps/chosen": -216.087646484375, "logps/rejected": -271.9454345703125, "loss": 0.4731, "rewards/accuracies": 0.625, "rewards/chosen": -0.1340225785970688, "rewards/margins": 1.750115156173706, "rewards/rejected": -1.8841376304626465, "step": 1502 }, { "epoch": 0.17, "learning_rate": 2.518202036755238e-07, "logits/chosen": -3.1169919967651367, "logits/rejected": -2.9825000762939453, "logps/chosen": -283.0966796875, "logps/rejected": -231.33950805664062, "loss": 0.418, "rewards/accuracies": 0.875, "rewards/chosen": -0.24275217950344086, "rewards/margins": 1.3899757862091064, "rewards/rejected": -1.632727861404419, "step": 1503 }, { "epoch": 0.17, "learning_rate": 2.5178508720589955e-07, "logits/chosen": -2.987081527709961, "logits/rejected": -2.6397030353546143, "logps/chosen": -219.1697998046875, "logps/rejected": -131.4944610595703, "loss": 0.541, "rewards/accuracies": 0.75, "rewards/chosen": -0.1636643409729004, "rewards/margins": 0.5931396484375, "rewards/rejected": -0.7568040490150452, "step": 1504 }, { "epoch": 0.17, "learning_rate": 2.517499707362753e-07, "logits/chosen": -3.062993288040161, "logits/rejected": -2.8243839740753174, "logps/chosen": -172.07723999023438, "logps/rejected": -155.2359619140625, "loss": 0.7437, "rewards/accuracies": 0.5, "rewards/chosen": -0.5870838761329651, "rewards/margins": 0.43352046608924866, "rewards/rejected": -1.0206043720245361, "step": 1505 }, { "epoch": 0.17, "learning_rate": 2.5171485426665105e-07, "logits/chosen": -3.0968070030212402, "logits/rejected": -3.1918082237243652, "logps/chosen": -312.1986999511719, "logps/rejected": -229.95018005371094, "loss": 0.4018, "rewards/accuracies": 0.875, "rewards/chosen": 0.03913099318742752, "rewards/margins": 1.4451513290405273, "rewards/rejected": -1.4060204029083252, "step": 1506 }, { "epoch": 0.17, "learning_rate": 2.5167973779702676e-07, "logits/chosen": -3.5944979190826416, "logits/rejected": -3.679166793823242, "logps/chosen": -146.9656219482422, "logps/rejected": -180.70065307617188, "loss": 0.3643, "rewards/accuracies": 0.875, "rewards/chosen": -0.20001515746116638, "rewards/margins": 1.4685659408569336, "rewards/rejected": -1.6685811281204224, "step": 1507 }, { "epoch": 0.17, "learning_rate": 2.5164462132740256e-07, "logits/chosen": -3.5475285053253174, "logits/rejected": -3.7423617839813232, "logps/chosen": -297.0629577636719, "logps/rejected": -312.3211975097656, "loss": 0.2129, "rewards/accuracies": 1.0, "rewards/chosen": 0.06770460307598114, "rewards/margins": 2.316929340362549, "rewards/rejected": -2.2492244243621826, "step": 1508 }, { "epoch": 0.17, "learning_rate": 2.516095048577783e-07, "logits/chosen": -3.592390537261963, "logits/rejected": -3.5594072341918945, "logps/chosen": -379.09405517578125, "logps/rejected": -354.7581481933594, "loss": 0.2075, "rewards/accuracies": 1.0, "rewards/chosen": 0.30030566453933716, "rewards/margins": 1.8552958965301514, "rewards/rejected": -1.554990291595459, "step": 1509 }, { "epoch": 0.17, "learning_rate": 2.51574388388154e-07, "logits/chosen": -2.481785774230957, "logits/rejected": -2.6133971214294434, "logps/chosen": -364.5009765625, "logps/rejected": -284.3797912597656, "loss": 0.5773, "rewards/accuracies": 0.625, "rewards/chosen": -0.30536380410194397, "rewards/margins": 1.714145302772522, "rewards/rejected": -2.0195090770721436, "step": 1510 }, { "epoch": 0.17, "learning_rate": 2.5153927191852977e-07, "logits/chosen": -3.0965898036956787, "logits/rejected": -2.941537380218506, "logps/chosen": -162.57894897460938, "logps/rejected": -212.2760772705078, "loss": 0.4261, "rewards/accuracies": 0.875, "rewards/chosen": -0.0826662927865982, "rewards/margins": 1.461862564086914, "rewards/rejected": -1.544528841972351, "step": 1511 }, { "epoch": 0.17, "learning_rate": 2.515041554489055e-07, "logits/chosen": -3.766932964324951, "logits/rejected": -3.260315418243408, "logps/chosen": -161.52146911621094, "logps/rejected": -216.46792602539062, "loss": 0.394, "rewards/accuracies": 0.75, "rewards/chosen": -0.07360772788524628, "rewards/margins": 1.493163824081421, "rewards/rejected": -1.5667716264724731, "step": 1512 }, { "epoch": 0.17, "learning_rate": 2.514690389792813e-07, "logits/chosen": -3.773805618286133, "logits/rejected": -3.721752882003784, "logps/chosen": -260.4869384765625, "logps/rejected": -601.2622680664062, "loss": 0.5625, "rewards/accuracies": 0.625, "rewards/chosen": -0.17422962188720703, "rewards/margins": 1.3251670598983765, "rewards/rejected": -1.4993966817855835, "step": 1513 }, { "epoch": 0.17, "learning_rate": 2.5143392250965703e-07, "logits/chosen": -3.7214431762695312, "logits/rejected": -3.800624370574951, "logps/chosen": -208.68771362304688, "logps/rejected": -178.89479064941406, "loss": 0.2989, "rewards/accuracies": 0.875, "rewards/chosen": 0.2097526341676712, "rewards/margins": 1.836012840270996, "rewards/rejected": -1.6262602806091309, "step": 1514 }, { "epoch": 0.17, "learning_rate": 2.5139880604003273e-07, "logits/chosen": -2.7074971199035645, "logits/rejected": -2.684040069580078, "logps/chosen": -292.5515441894531, "logps/rejected": -375.0087585449219, "loss": 0.3097, "rewards/accuracies": 0.875, "rewards/chosen": 0.033129312098026276, "rewards/margins": 1.973036289215088, "rewards/rejected": -1.9399070739746094, "step": 1515 }, { "epoch": 0.17, "learning_rate": 2.513636895704085e-07, "logits/chosen": -2.9524431228637695, "logits/rejected": -2.5515966415405273, "logps/chosen": -151.3790283203125, "logps/rejected": -129.77256774902344, "loss": 0.531, "rewards/accuracies": 0.5, "rewards/chosen": -0.036276623606681824, "rewards/margins": 0.5077760815620422, "rewards/rejected": -0.5440527200698853, "step": 1516 }, { "epoch": 0.17, "learning_rate": 2.513285731007843e-07, "logits/chosen": -2.5420925617218018, "logits/rejected": -2.2470641136169434, "logps/chosen": -178.55606079101562, "logps/rejected": -325.8009033203125, "loss": 0.4894, "rewards/accuracies": 0.75, "rewards/chosen": -0.24279308319091797, "rewards/margins": 0.7724299430847168, "rewards/rejected": -1.0152230262756348, "step": 1517 }, { "epoch": 0.17, "learning_rate": 2.5129345663116e-07, "logits/chosen": -2.867743730545044, "logits/rejected": -3.0709424018859863, "logps/chosen": -355.54925537109375, "logps/rejected": -321.88739013671875, "loss": 0.2218, "rewards/accuracies": 0.875, "rewards/chosen": 0.3604843020439148, "rewards/margins": 2.753653049468994, "rewards/rejected": -2.3931686878204346, "step": 1518 }, { "epoch": 0.18, "learning_rate": 2.5125834016153575e-07, "logits/chosen": -3.983919382095337, "logits/rejected": -3.6354708671569824, "logps/chosen": -249.71902465820312, "logps/rejected": -216.30906677246094, "loss": 0.5517, "rewards/accuracies": 0.875, "rewards/chosen": 0.17475387454032898, "rewards/margins": 1.0399212837219238, "rewards/rejected": -0.8651673793792725, "step": 1519 }, { "epoch": 0.18, "learning_rate": 2.512232236919115e-07, "logits/chosen": -2.983504295349121, "logits/rejected": -3.1349196434020996, "logps/chosen": -358.331787109375, "logps/rejected": -359.50201416015625, "loss": 0.6995, "rewards/accuracies": 0.75, "rewards/chosen": -0.90455561876297, "rewards/margins": 0.47947126626968384, "rewards/rejected": -1.3840270042419434, "step": 1520 }, { "epoch": 0.18, "learning_rate": 2.5118810722228726e-07, "logits/chosen": -3.6364874839782715, "logits/rejected": -3.4873604774475098, "logps/chosen": -223.56915283203125, "logps/rejected": -215.62892150878906, "loss": 0.8344, "rewards/accuracies": 0.75, "rewards/chosen": -0.16205745935440063, "rewards/margins": 0.4507281184196472, "rewards/rejected": -0.6127855777740479, "step": 1521 }, { "epoch": 0.18, "learning_rate": 2.51152990752663e-07, "logits/chosen": -2.818822145462036, "logits/rejected": -2.511584520339966, "logps/chosen": -177.04193115234375, "logps/rejected": -202.612060546875, "loss": 0.6728, "rewards/accuracies": 0.625, "rewards/chosen": -0.10911092162132263, "rewards/margins": 0.38139206171035767, "rewards/rejected": -0.4905029535293579, "step": 1522 }, { "epoch": 0.18, "learning_rate": 2.511178742830387e-07, "logits/chosen": -3.6642038822174072, "logits/rejected": -3.6444716453552246, "logps/chosen": -151.7933807373047, "logps/rejected": -222.90638732910156, "loss": 0.5246, "rewards/accuracies": 0.75, "rewards/chosen": -0.12398136407136917, "rewards/margins": 0.6475263237953186, "rewards/rejected": -0.7715076208114624, "step": 1523 }, { "epoch": 0.18, "learning_rate": 2.5108275781341447e-07, "logits/chosen": -2.939218282699585, "logits/rejected": -2.7400050163269043, "logps/chosen": -351.4151306152344, "logps/rejected": -290.5722351074219, "loss": 0.3827, "rewards/accuracies": 0.875, "rewards/chosen": 0.11940590292215347, "rewards/margins": 1.3269720077514648, "rewards/rejected": -1.207566261291504, "step": 1524 }, { "epoch": 0.18, "learning_rate": 2.510476413437902e-07, "logits/chosen": -3.1605799198150635, "logits/rejected": -2.9916059970855713, "logps/chosen": -201.02120971679688, "logps/rejected": -244.4366455078125, "loss": 0.6615, "rewards/accuracies": 0.625, "rewards/chosen": -0.341769814491272, "rewards/margins": 0.650853157043457, "rewards/rejected": -0.992622971534729, "step": 1525 }, { "epoch": 0.18, "learning_rate": 2.5101252487416597e-07, "logits/chosen": -3.2518396377563477, "logits/rejected": -2.992037296295166, "logps/chosen": -319.9474182128906, "logps/rejected": -218.11830139160156, "loss": 0.4058, "rewards/accuracies": 1.0, "rewards/chosen": -0.44514983892440796, "rewards/margins": 0.8046483993530273, "rewards/rejected": -1.2497981786727905, "step": 1526 }, { "epoch": 0.18, "learning_rate": 2.5097740840454173e-07, "logits/chosen": -3.189215898513794, "logits/rejected": -3.2065792083740234, "logps/chosen": -262.51348876953125, "logps/rejected": -177.74945068359375, "loss": 0.4016, "rewards/accuracies": 0.875, "rewards/chosen": -0.0813589096069336, "rewards/margins": 1.019492268562317, "rewards/rejected": -1.100851058959961, "step": 1527 }, { "epoch": 0.18, "learning_rate": 2.5094229193491743e-07, "logits/chosen": -3.1080856323242188, "logits/rejected": -3.2889695167541504, "logps/chosen": -206.8444061279297, "logps/rejected": -307.3691101074219, "loss": 0.1961, "rewards/accuracies": 1.0, "rewards/chosen": 0.054033853113651276, "rewards/margins": 2.93294358253479, "rewards/rejected": -2.8789095878601074, "step": 1528 }, { "epoch": 0.18, "learning_rate": 2.5090717546529323e-07, "logits/chosen": -2.9138283729553223, "logits/rejected": -3.11415958404541, "logps/chosen": -203.69384765625, "logps/rejected": -273.83673095703125, "loss": 0.3313, "rewards/accuracies": 1.0, "rewards/chosen": 0.5592360496520996, "rewards/margins": 1.452459692955017, "rewards/rejected": -0.8932234644889832, "step": 1529 }, { "epoch": 0.18, "learning_rate": 2.50872058995669e-07, "logits/chosen": -3.6793699264526367, "logits/rejected": -3.726012706756592, "logps/chosen": -249.152587890625, "logps/rejected": -315.2601013183594, "loss": 0.1541, "rewards/accuracies": 0.875, "rewards/chosen": 0.61383056640625, "rewards/margins": 3.3448610305786133, "rewards/rejected": -2.7310304641723633, "step": 1530 }, { "epoch": 0.18, "learning_rate": 2.508369425260447e-07, "logits/chosen": -3.5303189754486084, "logits/rejected": -3.0636942386627197, "logps/chosen": -327.10516357421875, "logps/rejected": -171.04747009277344, "loss": 0.158, "rewards/accuracies": 1.0, "rewards/chosen": 0.6687989234924316, "rewards/margins": 2.985759735107422, "rewards/rejected": -2.316960334777832, "step": 1531 }, { "epoch": 0.18, "learning_rate": 2.5080182605642044e-07, "logits/chosen": -3.8624749183654785, "logits/rejected": -3.7587456703186035, "logps/chosen": -182.19215393066406, "logps/rejected": -177.85023498535156, "loss": 0.472, "rewards/accuracies": 0.75, "rewards/chosen": 0.23013362288475037, "rewards/margins": 0.9002361297607422, "rewards/rejected": -0.6701024770736694, "step": 1532 }, { "epoch": 0.18, "learning_rate": 2.507667095867962e-07, "logits/chosen": -2.931903839111328, "logits/rejected": -2.8240795135498047, "logps/chosen": -230.96624755859375, "logps/rejected": -189.26007080078125, "loss": 0.4907, "rewards/accuracies": 0.875, "rewards/chosen": -0.034889012575149536, "rewards/margins": 1.2451589107513428, "rewards/rejected": -1.28004789352417, "step": 1533 }, { "epoch": 0.18, "learning_rate": 2.5073159311717195e-07, "logits/chosen": -3.2453575134277344, "logits/rejected": -3.176525115966797, "logps/chosen": -328.6094665527344, "logps/rejected": -371.0775146484375, "loss": 0.5481, "rewards/accuracies": 0.75, "rewards/chosen": -0.15590377151966095, "rewards/margins": 0.8421227931976318, "rewards/rejected": -0.9980265498161316, "step": 1534 }, { "epoch": 0.18, "learning_rate": 2.506964766475477e-07, "logits/chosen": -3.648616313934326, "logits/rejected": -3.693873405456543, "logps/chosen": -206.76675415039062, "logps/rejected": -203.88589477539062, "loss": 0.3627, "rewards/accuracies": 0.875, "rewards/chosen": -0.25549715757369995, "rewards/margins": 1.6640931367874146, "rewards/rejected": -1.9195902347564697, "step": 1535 }, { "epoch": 0.18, "learning_rate": 2.506613601779234e-07, "logits/chosen": -2.862778902053833, "logits/rejected": -3.0783872604370117, "logps/chosen": -160.05374145507812, "logps/rejected": -199.77247619628906, "loss": 0.3634, "rewards/accuracies": 0.75, "rewards/chosen": 0.11497992277145386, "rewards/margins": 1.8958029747009277, "rewards/rejected": -1.7808231115341187, "step": 1536 }, { "epoch": 0.18, "learning_rate": 2.5062624370829916e-07, "logits/chosen": -2.9934234619140625, "logits/rejected": -3.3008384704589844, "logps/chosen": -265.7355651855469, "logps/rejected": -254.08668518066406, "loss": 0.341, "rewards/accuracies": 0.875, "rewards/chosen": 0.06950540840625763, "rewards/margins": 1.3061575889587402, "rewards/rejected": -1.236652135848999, "step": 1537 }, { "epoch": 0.18, "learning_rate": 2.505911272386749e-07, "logits/chosen": -3.3784804344177246, "logits/rejected": -3.5589330196380615, "logps/chosen": -198.95965576171875, "logps/rejected": -218.23545837402344, "loss": 0.414, "rewards/accuracies": 0.75, "rewards/chosen": 0.016104191541671753, "rewards/margins": 1.0998241901397705, "rewards/rejected": -1.0837198495864868, "step": 1538 }, { "epoch": 0.18, "learning_rate": 2.5055601076905067e-07, "logits/chosen": -3.2002041339874268, "logits/rejected": -2.983340263366699, "logps/chosen": -346.22802734375, "logps/rejected": -166.5175323486328, "loss": 0.4991, "rewards/accuracies": 0.75, "rewards/chosen": 0.21396848559379578, "rewards/margins": 0.7792195081710815, "rewards/rejected": -0.5652509927749634, "step": 1539 }, { "epoch": 0.18, "learning_rate": 2.505208942994264e-07, "logits/chosen": -2.4443418979644775, "logits/rejected": -2.3749611377716064, "logps/chosen": -231.0518035888672, "logps/rejected": -248.08737182617188, "loss": 0.3561, "rewards/accuracies": 0.75, "rewards/chosen": 0.1508564054965973, "rewards/margins": 1.5054519176483154, "rewards/rejected": -1.354595422744751, "step": 1540 }, { "epoch": 0.18, "learning_rate": 2.504857778298022e-07, "logits/chosen": -2.6610379219055176, "logits/rejected": -3.072960615158081, "logps/chosen": -233.3808135986328, "logps/rejected": -246.25082397460938, "loss": 0.3089, "rewards/accuracies": 1.0, "rewards/chosen": -0.012616094201803207, "rewards/margins": 1.4887852668762207, "rewards/rejected": -1.5014013051986694, "step": 1541 }, { "epoch": 0.18, "learning_rate": 2.5045066136017793e-07, "logits/chosen": -2.637578248977661, "logits/rejected": -2.898172378540039, "logps/chosen": -214.5455322265625, "logps/rejected": -280.34515380859375, "loss": 0.3984, "rewards/accuracies": 0.875, "rewards/chosen": -0.4203321635723114, "rewards/margins": 1.1077349185943604, "rewards/rejected": -1.5280669927597046, "step": 1542 }, { "epoch": 0.18, "learning_rate": 2.504155448905537e-07, "logits/chosen": -2.4137964248657227, "logits/rejected": -2.6297972202301025, "logps/chosen": -272.5074462890625, "logps/rejected": -201.68695068359375, "loss": 0.4451, "rewards/accuracies": 0.75, "rewards/chosen": 0.027765393257141113, "rewards/margins": 1.4940557479858398, "rewards/rejected": -1.4662903547286987, "step": 1543 }, { "epoch": 0.18, "learning_rate": 2.503804284209294e-07, "logits/chosen": -3.3724944591522217, "logits/rejected": -3.337322235107422, "logps/chosen": -229.9352264404297, "logps/rejected": -249.21282958984375, "loss": 0.418, "rewards/accuracies": 0.75, "rewards/chosen": -0.5513677597045898, "rewards/margins": 1.8722014427185059, "rewards/rejected": -2.4235692024230957, "step": 1544 }, { "epoch": 0.18, "learning_rate": 2.5034531195130514e-07, "logits/chosen": -3.412881374359131, "logits/rejected": -3.261493682861328, "logps/chosen": -129.72894287109375, "logps/rejected": -200.30181884765625, "loss": 0.4424, "rewards/accuracies": 0.75, "rewards/chosen": -0.1342257261276245, "rewards/margins": 1.9664995670318604, "rewards/rejected": -2.1007254123687744, "step": 1545 }, { "epoch": 0.18, "learning_rate": 2.503101954816809e-07, "logits/chosen": -2.6883387565612793, "logits/rejected": -2.8215625286102295, "logps/chosen": -202.64227294921875, "logps/rejected": -136.78631591796875, "loss": 0.5223, "rewards/accuracies": 0.625, "rewards/chosen": 0.06284372508525848, "rewards/margins": 0.5342840552330017, "rewards/rejected": -0.4714403748512268, "step": 1546 }, { "epoch": 0.18, "learning_rate": 2.5027507901205665e-07, "logits/chosen": -3.4476959705352783, "logits/rejected": -3.2603530883789062, "logps/chosen": -205.8194122314453, "logps/rejected": -305.54925537109375, "loss": 0.3538, "rewards/accuracies": 0.875, "rewards/chosen": 0.11508862674236298, "rewards/margins": 1.584865689277649, "rewards/rejected": -1.46977698802948, "step": 1547 }, { "epoch": 0.18, "learning_rate": 2.502399625424324e-07, "logits/chosen": -3.1585943698883057, "logits/rejected": -3.5449790954589844, "logps/chosen": -209.16317749023438, "logps/rejected": -332.84423828125, "loss": 0.26, "rewards/accuracies": 1.0, "rewards/chosen": 0.2629750967025757, "rewards/margins": 1.6706701517105103, "rewards/rejected": -1.4076950550079346, "step": 1548 }, { "epoch": 0.18, "learning_rate": 2.5020484607280815e-07, "logits/chosen": -3.0686416625976562, "logits/rejected": -3.214515447616577, "logps/chosen": -169.7054443359375, "logps/rejected": -135.6467742919922, "loss": 0.7111, "rewards/accuracies": 0.5, "rewards/chosen": -0.7438569068908691, "rewards/margins": 0.21290910243988037, "rewards/rejected": -0.9567660689353943, "step": 1549 }, { "epoch": 0.18, "learning_rate": 2.5016972960318385e-07, "logits/chosen": -3.47174334526062, "logits/rejected": -3.284346342086792, "logps/chosen": -297.818359375, "logps/rejected": -281.1500549316406, "loss": 0.8368, "rewards/accuracies": 0.5, "rewards/chosen": 0.06337091326713562, "rewards/margins": 0.37006694078445435, "rewards/rejected": -0.3066960871219635, "step": 1550 }, { "epoch": 0.18, "learning_rate": 2.5013461313355966e-07, "logits/chosen": -3.2129576206207275, "logits/rejected": -3.3759443759918213, "logps/chosen": -330.897216796875, "logps/rejected": -324.5155334472656, "loss": 0.2553, "rewards/accuracies": 0.875, "rewards/chosen": 0.7761394381523132, "rewards/margins": 2.4174435138702393, "rewards/rejected": -1.6413041353225708, "step": 1551 }, { "epoch": 0.18, "learning_rate": 2.5009949666393536e-07, "logits/chosen": -3.4097652435302734, "logits/rejected": -3.7561657428741455, "logps/chosen": -240.09689331054688, "logps/rejected": -292.41033935546875, "loss": 0.6568, "rewards/accuracies": 0.625, "rewards/chosen": -0.19328394532203674, "rewards/margins": 0.9810360670089722, "rewards/rejected": -1.1743199825286865, "step": 1552 }, { "epoch": 0.18, "learning_rate": 2.500643801943111e-07, "logits/chosen": -3.154953956604004, "logits/rejected": -3.1746201515197754, "logps/chosen": -223.4775390625, "logps/rejected": -233.44090270996094, "loss": 0.6542, "rewards/accuracies": 0.75, "rewards/chosen": 0.03741864860057831, "rewards/margins": 0.4985285997390747, "rewards/rejected": -0.4611099362373352, "step": 1553 }, { "epoch": 0.18, "learning_rate": 2.5002926372468687e-07, "logits/chosen": -2.7029380798339844, "logits/rejected": -3.0414600372314453, "logps/chosen": -222.06947326660156, "logps/rejected": -333.65850830078125, "loss": 0.4251, "rewards/accuracies": 0.75, "rewards/chosen": 0.06763535737991333, "rewards/margins": 1.301175832748413, "rewards/rejected": -1.233540415763855, "step": 1554 }, { "epoch": 0.18, "learning_rate": 2.499941472550626e-07, "logits/chosen": -2.7445449829101562, "logits/rejected": -2.816120147705078, "logps/chosen": -183.89080810546875, "logps/rejected": -212.0049285888672, "loss": 0.4289, "rewards/accuracies": 0.75, "rewards/chosen": 0.00840894877910614, "rewards/margins": 1.3367893695831299, "rewards/rejected": -1.3283804655075073, "step": 1555 }, { "epoch": 0.18, "learning_rate": 2.499590307854384e-07, "logits/chosen": -3.7259578704833984, "logits/rejected": -3.061728000640869, "logps/chosen": -440.28936767578125, "logps/rejected": -236.62127685546875, "loss": 0.4553, "rewards/accuracies": 0.75, "rewards/chosen": -0.2671167254447937, "rewards/margins": 1.394223690032959, "rewards/rejected": -1.661340594291687, "step": 1556 }, { "epoch": 0.18, "learning_rate": 2.4992391431581413e-07, "logits/chosen": -3.3398337364196777, "logits/rejected": -3.5374815464019775, "logps/chosen": -210.3216552734375, "logps/rejected": -361.6911315917969, "loss": 0.2462, "rewards/accuracies": 0.875, "rewards/chosen": 0.21619391441345215, "rewards/margins": 2.6669905185699463, "rewards/rejected": -2.450796365737915, "step": 1557 }, { "epoch": 0.18, "learning_rate": 2.4988879784618983e-07, "logits/chosen": -3.350187301635742, "logits/rejected": -3.329085111618042, "logps/chosen": -248.35025024414062, "logps/rejected": -478.1139831542969, "loss": 0.6189, "rewards/accuracies": 0.875, "rewards/chosen": -0.20270265638828278, "rewards/margins": 1.2441428899765015, "rewards/rejected": -1.446845531463623, "step": 1558 }, { "epoch": 0.18, "learning_rate": 2.498536813765656e-07, "logits/chosen": -2.966024160385132, "logits/rejected": -2.862165689468384, "logps/chosen": -133.44998168945312, "logps/rejected": -272.40924072265625, "loss": 0.3241, "rewards/accuracies": 0.75, "rewards/chosen": 0.35100653767585754, "rewards/margins": 1.519694447517395, "rewards/rejected": -1.1686878204345703, "step": 1559 }, { "epoch": 0.18, "learning_rate": 2.4981856490694134e-07, "logits/chosen": -2.8258790969848633, "logits/rejected": -2.789914131164551, "logps/chosen": -287.9441223144531, "logps/rejected": -319.75823974609375, "loss": 0.3991, "rewards/accuracies": 0.875, "rewards/chosen": -0.08661016821861267, "rewards/margins": 1.0961226224899292, "rewards/rejected": -1.1827328205108643, "step": 1560 }, { "epoch": 0.18, "learning_rate": 2.497834484373171e-07, "logits/chosen": -2.6846508979797363, "logits/rejected": -3.1744747161865234, "logps/chosen": -316.5074157714844, "logps/rejected": -177.9369354248047, "loss": 0.6516, "rewards/accuracies": 0.5, "rewards/chosen": -0.5177503824234009, "rewards/margins": 0.621163010597229, "rewards/rejected": -1.1389133930206299, "step": 1561 }, { "epoch": 0.18, "learning_rate": 2.4974833196769285e-07, "logits/chosen": -3.4028801918029785, "logits/rejected": -3.491983413696289, "logps/chosen": -88.90687561035156, "logps/rejected": -194.2624969482422, "loss": 0.3695, "rewards/accuracies": 0.625, "rewards/chosen": 0.047424376010894775, "rewards/margins": 1.8772265911102295, "rewards/rejected": -1.8298022747039795, "step": 1562 }, { "epoch": 0.18, "learning_rate": 2.4971321549806855e-07, "logits/chosen": -3.5973734855651855, "logits/rejected": -3.220582962036133, "logps/chosen": -420.0931396484375, "logps/rejected": -308.4820861816406, "loss": 0.4165, "rewards/accuracies": 0.75, "rewards/chosen": -0.1363658308982849, "rewards/margins": 1.0620437860488892, "rewards/rejected": -1.1984095573425293, "step": 1563 }, { "epoch": 0.18, "learning_rate": 2.4967809902844435e-07, "logits/chosen": -3.2502803802490234, "logits/rejected": -3.1472179889678955, "logps/chosen": -392.24847412109375, "logps/rejected": -310.6517333984375, "loss": 0.4379, "rewards/accuracies": 0.75, "rewards/chosen": 0.2914094924926758, "rewards/margins": 1.9295068979263306, "rewards/rejected": -1.6380972862243652, "step": 1564 }, { "epoch": 0.18, "learning_rate": 2.4964298255882006e-07, "logits/chosen": -3.1605935096740723, "logits/rejected": -2.9460926055908203, "logps/chosen": -319.314208984375, "logps/rejected": -148.277587890625, "loss": 0.3422, "rewards/accuracies": 0.75, "rewards/chosen": 0.2379521131515503, "rewards/margins": 1.936168909072876, "rewards/rejected": -1.6982166767120361, "step": 1565 }, { "epoch": 0.18, "learning_rate": 2.496078660891958e-07, "logits/chosen": -3.3417553901672363, "logits/rejected": -3.0829310417175293, "logps/chosen": -193.19308471679688, "logps/rejected": -207.80809020996094, "loss": 0.7751, "rewards/accuracies": 0.625, "rewards/chosen": -0.3709440231323242, "rewards/margins": 1.8440539836883545, "rewards/rejected": -2.214998245239258, "step": 1566 }, { "epoch": 0.18, "learning_rate": 2.4957274961957156e-07, "logits/chosen": -2.997227668762207, "logits/rejected": -2.9109063148498535, "logps/chosen": -351.82177734375, "logps/rejected": -340.80035400390625, "loss": 0.5469, "rewards/accuracies": 0.75, "rewards/chosen": -0.24442750215530396, "rewards/margins": 0.815355658531189, "rewards/rejected": -1.0597832202911377, "step": 1567 }, { "epoch": 0.18, "learning_rate": 2.495376331499473e-07, "logits/chosen": -2.496757745742798, "logits/rejected": -2.7690396308898926, "logps/chosen": -313.14971923828125, "logps/rejected": -284.92803955078125, "loss": 0.6893, "rewards/accuracies": 0.875, "rewards/chosen": 0.0612785667181015, "rewards/margins": 0.6400415897369385, "rewards/rejected": -0.5787630081176758, "step": 1568 }, { "epoch": 0.18, "learning_rate": 2.4950251668032307e-07, "logits/chosen": -2.715089797973633, "logits/rejected": -2.7075071334838867, "logps/chosen": -352.3373718261719, "logps/rejected": -322.6280517578125, "loss": 0.671, "rewards/accuracies": 0.5, "rewards/chosen": -0.25847962498664856, "rewards/margins": 0.31642913818359375, "rewards/rejected": -0.5749087929725647, "step": 1569 }, { "epoch": 0.18, "learning_rate": 2.494674002106988e-07, "logits/chosen": -3.4108870029449463, "logits/rejected": -3.565894365310669, "logps/chosen": -241.74029541015625, "logps/rejected": -273.7139587402344, "loss": 0.5391, "rewards/accuracies": 0.625, "rewards/chosen": -0.24049139022827148, "rewards/margins": 1.1604478359222412, "rewards/rejected": -1.4009393453598022, "step": 1570 }, { "epoch": 0.18, "learning_rate": 2.494322837410745e-07, "logits/chosen": -2.145329475402832, "logits/rejected": -2.1749789714813232, "logps/chosen": -238.5030517578125, "logps/rejected": -188.67239379882812, "loss": 0.6515, "rewards/accuracies": 0.75, "rewards/chosen": -0.09605341404676437, "rewards/margins": 0.7354627251625061, "rewards/rejected": -0.8315160274505615, "step": 1571 }, { "epoch": 0.18, "learning_rate": 2.493971672714503e-07, "logits/chosen": -2.934159755706787, "logits/rejected": -2.9304256439208984, "logps/chosen": -255.47142028808594, "logps/rejected": -272.83392333984375, "loss": 0.3056, "rewards/accuracies": 0.875, "rewards/chosen": -0.037884414196014404, "rewards/margins": 1.394587516784668, "rewards/rejected": -1.4324719905853271, "step": 1572 }, { "epoch": 0.18, "learning_rate": 2.4936205080182603e-07, "logits/chosen": -2.2936620712280273, "logits/rejected": -2.2601540088653564, "logps/chosen": -346.5345153808594, "logps/rejected": -327.0447998046875, "loss": 0.2829, "rewards/accuracies": 1.0, "rewards/chosen": 0.5475779175758362, "rewards/margins": 1.8047429323196411, "rewards/rejected": -1.2571649551391602, "step": 1573 }, { "epoch": 0.18, "learning_rate": 2.493269343322018e-07, "logits/chosen": -2.484318971633911, "logits/rejected": -2.7793354988098145, "logps/chosen": -204.45579528808594, "logps/rejected": -228.13340759277344, "loss": 0.4569, "rewards/accuracies": 0.5, "rewards/chosen": 0.0884358137845993, "rewards/margins": 0.9228543639183044, "rewards/rejected": -0.8344185948371887, "step": 1574 }, { "epoch": 0.18, "learning_rate": 2.4929181786257754e-07, "logits/chosen": -2.2387490272521973, "logits/rejected": -2.1321918964385986, "logps/chosen": -168.62783813476562, "logps/rejected": -255.7255859375, "loss": 0.4917, "rewards/accuracies": 0.75, "rewards/chosen": 0.05357038602232933, "rewards/margins": 0.6330357193946838, "rewards/rejected": -0.5794653296470642, "step": 1575 }, { "epoch": 0.18, "learning_rate": 2.492567013929533e-07, "logits/chosen": -3.2566893100738525, "logits/rejected": -3.1014766693115234, "logps/chosen": -192.5931854248047, "logps/rejected": -303.3433532714844, "loss": 0.2376, "rewards/accuracies": 1.0, "rewards/chosen": 0.3248193562030792, "rewards/margins": 1.9190062284469604, "rewards/rejected": -1.5941870212554932, "step": 1576 }, { "epoch": 0.18, "learning_rate": 2.4922158492332905e-07, "logits/chosen": -3.010411500930786, "logits/rejected": -2.990661144256592, "logps/chosen": -134.6187286376953, "logps/rejected": -259.3593444824219, "loss": 0.43, "rewards/accuracies": 0.75, "rewards/chosen": 0.23420998454093933, "rewards/margins": 1.313028335571289, "rewards/rejected": -1.0788183212280273, "step": 1577 }, { "epoch": 0.18, "learning_rate": 2.491864684537048e-07, "logits/chosen": -2.957808017730713, "logits/rejected": -2.910841226577759, "logps/chosen": -189.14065551757812, "logps/rejected": -182.82687377929688, "loss": 0.6245, "rewards/accuracies": 0.5, "rewards/chosen": -0.5441092848777771, "rewards/margins": 1.0646005868911743, "rewards/rejected": -1.6087098121643066, "step": 1578 }, { "epoch": 0.18, "learning_rate": 2.491513519840805e-07, "logits/chosen": -3.1919941902160645, "logits/rejected": -2.9025208950042725, "logps/chosen": -315.1257019042969, "logps/rejected": -395.47076416015625, "loss": 0.598, "rewards/accuracies": 0.625, "rewards/chosen": 0.09761975705623627, "rewards/margins": 1.0324183702468872, "rewards/rejected": -0.934798538684845, "step": 1579 }, { "epoch": 0.18, "learning_rate": 2.4911623551445626e-07, "logits/chosen": -4.053675651550293, "logits/rejected": -4.055758476257324, "logps/chosen": -166.01144409179688, "logps/rejected": -144.49032592773438, "loss": 0.385, "rewards/accuracies": 0.75, "rewards/chosen": 0.32841819524765015, "rewards/margins": 1.7198209762573242, "rewards/rejected": -1.3914028406143188, "step": 1580 }, { "epoch": 0.18, "learning_rate": 2.49081119044832e-07, "logits/chosen": -2.8934061527252197, "logits/rejected": -3.2023355960845947, "logps/chosen": -162.78872680664062, "logps/rejected": -136.22914123535156, "loss": 0.5985, "rewards/accuracies": 0.75, "rewards/chosen": -0.34314340353012085, "rewards/margins": 1.3626636266708374, "rewards/rejected": -1.705807089805603, "step": 1581 }, { "epoch": 0.18, "learning_rate": 2.4904600257520777e-07, "logits/chosen": -3.2262425422668457, "logits/rejected": -3.333815097808838, "logps/chosen": -275.0162353515625, "logps/rejected": -268.2195129394531, "loss": 0.5603, "rewards/accuracies": 0.75, "rewards/chosen": -0.4277450442314148, "rewards/margins": 1.2416092157363892, "rewards/rejected": -1.6693542003631592, "step": 1582 }, { "epoch": 0.18, "learning_rate": 2.490108861055835e-07, "logits/chosen": -3.454385757446289, "logits/rejected": -2.9647676944732666, "logps/chosen": -381.7331848144531, "logps/rejected": -275.11383056640625, "loss": 0.3108, "rewards/accuracies": 0.875, "rewards/chosen": -0.1144372969865799, "rewards/margins": 1.9118257761001587, "rewards/rejected": -2.0262629985809326, "step": 1583 }, { "epoch": 0.18, "learning_rate": 2.489757696359592e-07, "logits/chosen": -2.8361501693725586, "logits/rejected": -3.033151149749756, "logps/chosen": -205.57240295410156, "logps/rejected": -195.34246826171875, "loss": 0.4334, "rewards/accuracies": 0.75, "rewards/chosen": 0.1300015151500702, "rewards/margins": 1.1216909885406494, "rewards/rejected": -0.9916895031929016, "step": 1584 }, { "epoch": 0.18, "learning_rate": 2.4894065316633503e-07, "logits/chosen": -3.0783472061157227, "logits/rejected": -2.981541395187378, "logps/chosen": -463.94000244140625, "logps/rejected": -295.4822692871094, "loss": 0.2753, "rewards/accuracies": 0.875, "rewards/chosen": -0.05068501830101013, "rewards/margins": 1.8550400733947754, "rewards/rejected": -1.905725121498108, "step": 1585 }, { "epoch": 0.18, "learning_rate": 2.489055366967108e-07, "logits/chosen": -3.156273126602173, "logits/rejected": -3.4516024589538574, "logps/chosen": -339.36834716796875, "logps/rejected": -225.49777221679688, "loss": 0.341, "rewards/accuracies": 0.875, "rewards/chosen": 0.06556139141321182, "rewards/margins": 1.5363905429840088, "rewards/rejected": -1.4708291292190552, "step": 1586 }, { "epoch": 0.18, "learning_rate": 2.488704202270865e-07, "logits/chosen": -2.662827253341675, "logits/rejected": -2.6474499702453613, "logps/chosen": -272.6754150390625, "logps/rejected": -264.35333251953125, "loss": 0.5074, "rewards/accuracies": 0.875, "rewards/chosen": 0.8834242224693298, "rewards/margins": 0.5176845192909241, "rewards/rejected": 0.3657396137714386, "step": 1587 }, { "epoch": 0.18, "learning_rate": 2.4883530375746224e-07, "logits/chosen": -3.398542881011963, "logits/rejected": -3.3827009201049805, "logps/chosen": -369.9556579589844, "logps/rejected": -328.391845703125, "loss": 0.432, "rewards/accuracies": 0.75, "rewards/chosen": 0.17286589741706848, "rewards/margins": 0.9410245418548584, "rewards/rejected": -0.7681586742401123, "step": 1588 }, { "epoch": 0.18, "learning_rate": 2.48800187287838e-07, "logits/chosen": -3.1807615756988525, "logits/rejected": -3.209909439086914, "logps/chosen": -256.842529296875, "logps/rejected": -231.21083068847656, "loss": 0.3386, "rewards/accuracies": 0.75, "rewards/chosen": 0.3647618293762207, "rewards/margins": 2.1404671669006348, "rewards/rejected": -1.775705099105835, "step": 1589 }, { "epoch": 0.18, "learning_rate": 2.4876507081821374e-07, "logits/chosen": -3.210204839706421, "logits/rejected": -3.1936206817626953, "logps/chosen": -160.5758056640625, "logps/rejected": -138.31202697753906, "loss": 0.3473, "rewards/accuracies": 1.0, "rewards/chosen": 0.6779247522354126, "rewards/margins": 1.426550030708313, "rewards/rejected": -0.7486251592636108, "step": 1590 }, { "epoch": 0.18, "learning_rate": 2.487299543485895e-07, "logits/chosen": -2.3738036155700684, "logits/rejected": -2.468855142593384, "logps/chosen": -379.32012939453125, "logps/rejected": -332.70819091796875, "loss": 0.4349, "rewards/accuracies": 0.75, "rewards/chosen": 0.32497167587280273, "rewards/margins": 0.8997296094894409, "rewards/rejected": -0.5747578740119934, "step": 1591 }, { "epoch": 0.18, "learning_rate": 2.486948378789652e-07, "logits/chosen": -3.562161684036255, "logits/rejected": -3.0826175212860107, "logps/chosen": -251.93624877929688, "logps/rejected": -192.786865234375, "loss": 0.5886, "rewards/accuracies": 0.75, "rewards/chosen": -0.6212844848632812, "rewards/margins": 0.7748706340789795, "rewards/rejected": -1.3961551189422607, "step": 1592 }, { "epoch": 0.18, "learning_rate": 2.4865972140934095e-07, "logits/chosen": -2.4381299018859863, "logits/rejected": -2.9638969898223877, "logps/chosen": -302.59332275390625, "logps/rejected": -299.6400451660156, "loss": 0.3321, "rewards/accuracies": 0.875, "rewards/chosen": 0.3786783218383789, "rewards/margins": 2.049281120300293, "rewards/rejected": -1.670602798461914, "step": 1593 }, { "epoch": 0.18, "learning_rate": 2.486246049397167e-07, "logits/chosen": -2.9776320457458496, "logits/rejected": -2.75177264213562, "logps/chosen": -225.4220428466797, "logps/rejected": -150.24493408203125, "loss": 0.4986, "rewards/accuracies": 0.75, "rewards/chosen": -0.08923830091953278, "rewards/margins": 0.8605561256408691, "rewards/rejected": -0.9497944712638855, "step": 1594 }, { "epoch": 0.18, "learning_rate": 2.4858948847009246e-07, "logits/chosen": -2.8370304107666016, "logits/rejected": -2.55305552482605, "logps/chosen": -417.40911865234375, "logps/rejected": -214.65200805664062, "loss": 0.2867, "rewards/accuracies": 0.875, "rewards/chosen": -0.12078927457332611, "rewards/margins": 1.4924275875091553, "rewards/rejected": -1.613216757774353, "step": 1595 }, { "epoch": 0.18, "learning_rate": 2.485543720004682e-07, "logits/chosen": -2.8922176361083984, "logits/rejected": -2.8392605781555176, "logps/chosen": -179.53965759277344, "logps/rejected": -193.11843872070312, "loss": 0.6272, "rewards/accuracies": 0.625, "rewards/chosen": -0.3021852970123291, "rewards/margins": 1.0617811679840088, "rewards/rejected": -1.363966464996338, "step": 1596 }, { "epoch": 0.18, "learning_rate": 2.485192555308439e-07, "logits/chosen": -2.915086269378662, "logits/rejected": -3.243215560913086, "logps/chosen": -136.1141357421875, "logps/rejected": -206.26516723632812, "loss": 0.2515, "rewards/accuracies": 1.0, "rewards/chosen": 0.028124883770942688, "rewards/margins": 1.6708011627197266, "rewards/rejected": -1.6426763534545898, "step": 1597 }, { "epoch": 0.18, "learning_rate": 2.484841390612197e-07, "logits/chosen": -2.5279502868652344, "logits/rejected": -2.3170130252838135, "logps/chosen": -281.1407165527344, "logps/rejected": -283.10491943359375, "loss": 0.6542, "rewards/accuracies": 0.625, "rewards/chosen": -0.5464170575141907, "rewards/margins": 0.8834316730499268, "rewards/rejected": -1.4298487901687622, "step": 1598 }, { "epoch": 0.18, "learning_rate": 2.484490225915955e-07, "logits/chosen": -3.3838205337524414, "logits/rejected": -3.777003288269043, "logps/chosen": -140.44464111328125, "logps/rejected": -234.48382568359375, "loss": 0.4196, "rewards/accuracies": 0.75, "rewards/chosen": 0.33067017793655396, "rewards/margins": 1.441359281539917, "rewards/rejected": -1.1106890439987183, "step": 1599 }, { "epoch": 0.18, "learning_rate": 2.484139061219712e-07, "logits/chosen": -2.4268360137939453, "logits/rejected": -2.7971158027648926, "logps/chosen": -474.1029052734375, "logps/rejected": -311.8132019042969, "loss": 0.4138, "rewards/accuracies": 0.75, "rewards/chosen": 0.019878730177879333, "rewards/margins": 1.0621018409729004, "rewards/rejected": -1.0422230958938599, "step": 1600 }, { "epoch": 0.18, "learning_rate": 2.4837878965234693e-07, "logits/chosen": -2.8265023231506348, "logits/rejected": -2.715358018875122, "logps/chosen": -271.5411071777344, "logps/rejected": -215.6396484375, "loss": 0.6791, "rewards/accuracies": 0.75, "rewards/chosen": -0.40635865926742554, "rewards/margins": 0.5961260795593262, "rewards/rejected": -1.0024847984313965, "step": 1601 }, { "epoch": 0.18, "learning_rate": 2.483436731827227e-07, "logits/chosen": -2.425666570663452, "logits/rejected": -2.435767650604248, "logps/chosen": -401.4701232910156, "logps/rejected": -383.2276306152344, "loss": 0.1583, "rewards/accuracies": 1.0, "rewards/chosen": 0.491716593503952, "rewards/margins": 2.392782688140869, "rewards/rejected": -1.9010661840438843, "step": 1602 }, { "epoch": 0.18, "learning_rate": 2.4830855671309844e-07, "logits/chosen": -3.0405819416046143, "logits/rejected": -3.0422515869140625, "logps/chosen": -208.3442840576172, "logps/rejected": -192.49871826171875, "loss": 0.3668, "rewards/accuracies": 0.875, "rewards/chosen": 0.30194300413131714, "rewards/margins": 1.4936113357543945, "rewards/rejected": -1.191668152809143, "step": 1603 }, { "epoch": 0.18, "learning_rate": 2.482734402434742e-07, "logits/chosen": -2.9584836959838867, "logits/rejected": -2.805340051651001, "logps/chosen": -303.8201904296875, "logps/rejected": -221.5235595703125, "loss": 0.5706, "rewards/accuracies": 0.75, "rewards/chosen": 0.2331658899784088, "rewards/margins": 1.1537485122680664, "rewards/rejected": -0.92058265209198, "step": 1604 }, { "epoch": 0.19, "learning_rate": 2.482383237738499e-07, "logits/chosen": -2.7143802642822266, "logits/rejected": -2.929252862930298, "logps/chosen": -256.7149963378906, "logps/rejected": -242.93460083007812, "loss": 0.6775, "rewards/accuracies": 0.625, "rewards/chosen": -0.491890549659729, "rewards/margins": 0.7323378324508667, "rewards/rejected": -1.2242283821105957, "step": 1605 }, { "epoch": 0.19, "learning_rate": 2.4820320730422565e-07, "logits/chosen": -3.0357983112335205, "logits/rejected": -3.186969041824341, "logps/chosen": -343.0533142089844, "logps/rejected": -258.09930419921875, "loss": 0.4322, "rewards/accuracies": 0.875, "rewards/chosen": -0.07278376817703247, "rewards/margins": 0.7776103019714355, "rewards/rejected": -0.8503940105438232, "step": 1606 }, { "epoch": 0.19, "learning_rate": 2.4816809083460145e-07, "logits/chosen": -2.9929580688476562, "logits/rejected": -2.909958839416504, "logps/chosen": -318.1580810546875, "logps/rejected": -201.66400146484375, "loss": 0.3284, "rewards/accuracies": 0.875, "rewards/chosen": -0.11313905566930771, "rewards/margins": 1.3388831615447998, "rewards/rejected": -1.4520221948623657, "step": 1607 }, { "epoch": 0.19, "learning_rate": 2.4813297436497715e-07, "logits/chosen": -3.3802247047424316, "logits/rejected": -3.0135278701782227, "logps/chosen": -208.70184326171875, "logps/rejected": -133.7020721435547, "loss": 0.5389, "rewards/accuracies": 0.5, "rewards/chosen": -0.3157225549221039, "rewards/margins": 0.6978271007537842, "rewards/rejected": -1.0135496854782104, "step": 1608 }, { "epoch": 0.19, "learning_rate": 2.480978578953529e-07, "logits/chosen": -3.0474600791931152, "logits/rejected": -3.0875656604766846, "logps/chosen": -204.23968505859375, "logps/rejected": -286.5731201171875, "loss": 0.1808, "rewards/accuracies": 0.875, "rewards/chosen": 0.3811301290988922, "rewards/margins": 2.51344633102417, "rewards/rejected": -2.1323161125183105, "step": 1609 }, { "epoch": 0.19, "learning_rate": 2.4806274142572866e-07, "logits/chosen": -3.1642093658447266, "logits/rejected": -3.080878257751465, "logps/chosen": -285.5237121582031, "logps/rejected": -211.30711364746094, "loss": 0.4949, "rewards/accuracies": 0.75, "rewards/chosen": 0.16524535417556763, "rewards/margins": 1.6269410848617554, "rewards/rejected": -1.461695671081543, "step": 1610 }, { "epoch": 0.19, "learning_rate": 2.480276249561044e-07, "logits/chosen": -2.4304065704345703, "logits/rejected": -2.5123777389526367, "logps/chosen": -356.9730529785156, "logps/rejected": -323.14324951171875, "loss": 0.7664, "rewards/accuracies": 0.5, "rewards/chosen": -0.04421408474445343, "rewards/margins": 0.09974081814289093, "rewards/rejected": -0.14395490288734436, "step": 1611 }, { "epoch": 0.19, "learning_rate": 2.4799250848648017e-07, "logits/chosen": -3.4611644744873047, "logits/rejected": -3.5673651695251465, "logps/chosen": -200.72555541992188, "logps/rejected": -238.0050811767578, "loss": 0.1976, "rewards/accuracies": 1.0, "rewards/chosen": -0.037610769271850586, "rewards/margins": 1.9399579763412476, "rewards/rejected": -1.9775688648223877, "step": 1612 }, { "epoch": 0.19, "learning_rate": 2.4795739201685587e-07, "logits/chosen": -2.1919658184051514, "logits/rejected": -2.4560458660125732, "logps/chosen": -412.3431701660156, "logps/rejected": -345.5562744140625, "loss": 0.4905, "rewards/accuracies": 0.625, "rewards/chosen": 0.0391206294298172, "rewards/margins": 0.879487156867981, "rewards/rejected": -0.8403664827346802, "step": 1613 }, { "epoch": 0.19, "learning_rate": 2.479222755472316e-07, "logits/chosen": -3.2937936782836914, "logits/rejected": -3.07669997215271, "logps/chosen": -165.2415008544922, "logps/rejected": -90.95587158203125, "loss": 0.7672, "rewards/accuracies": 0.75, "rewards/chosen": -0.4400079846382141, "rewards/margins": 0.053300559520721436, "rewards/rejected": -0.49330854415893555, "step": 1614 }, { "epoch": 0.19, "learning_rate": 2.478871590776074e-07, "logits/chosen": -3.26701283454895, "logits/rejected": -3.3003578186035156, "logps/chosen": -262.5455322265625, "logps/rejected": -307.493408203125, "loss": 0.2377, "rewards/accuracies": 1.0, "rewards/chosen": 0.2662431597709656, "rewards/margins": 1.801184892654419, "rewards/rejected": -1.5349416732788086, "step": 1615 }, { "epoch": 0.19, "learning_rate": 2.4785204260798313e-07, "logits/chosen": -3.234891891479492, "logits/rejected": -3.027536630630493, "logps/chosen": -121.38128662109375, "logps/rejected": -157.8800506591797, "loss": 1.0565, "rewards/accuracies": 0.75, "rewards/chosen": -0.5487164258956909, "rewards/margins": 0.36075013875961304, "rewards/rejected": -0.9094666242599487, "step": 1616 }, { "epoch": 0.19, "learning_rate": 2.478169261383589e-07, "logits/chosen": -3.284700393676758, "logits/rejected": -3.5850830078125, "logps/chosen": -282.3678894042969, "logps/rejected": -434.3486328125, "loss": 0.2356, "rewards/accuracies": 0.875, "rewards/chosen": -0.47401413321495056, "rewards/margins": 2.741239070892334, "rewards/rejected": -3.2152533531188965, "step": 1617 }, { "epoch": 0.19, "learning_rate": 2.477818096687346e-07, "logits/chosen": -3.302093029022217, "logits/rejected": -3.140270709991455, "logps/chosen": -431.5946960449219, "logps/rejected": -240.39224243164062, "loss": 0.5559, "rewards/accuracies": 0.75, "rewards/chosen": 0.08787223696708679, "rewards/margins": 2.0257503986358643, "rewards/rejected": -1.937877893447876, "step": 1618 }, { "epoch": 0.19, "learning_rate": 2.477466931991104e-07, "logits/chosen": -3.3976333141326904, "logits/rejected": -3.103829860687256, "logps/chosen": -361.5479736328125, "logps/rejected": -147.52084350585938, "loss": 0.7588, "rewards/accuracies": 0.625, "rewards/chosen": -0.7584477663040161, "rewards/margins": 0.8833099603652954, "rewards/rejected": -1.641757607460022, "step": 1619 }, { "epoch": 0.19, "learning_rate": 2.4771157672948615e-07, "logits/chosen": -2.461674451828003, "logits/rejected": -2.426726818084717, "logps/chosen": -335.3467712402344, "logps/rejected": -316.70782470703125, "loss": 0.3147, "rewards/accuracies": 0.875, "rewards/chosen": 0.4429143965244293, "rewards/margins": 1.821640968322754, "rewards/rejected": -1.3787267208099365, "step": 1620 }, { "epoch": 0.19, "learning_rate": 2.4767646025986185e-07, "logits/chosen": -3.7233800888061523, "logits/rejected": -3.445781707763672, "logps/chosen": -326.6361389160156, "logps/rejected": -241.48277282714844, "loss": 0.2796, "rewards/accuracies": 1.0, "rewards/chosen": -0.06145267188549042, "rewards/margins": 1.5817797183990479, "rewards/rejected": -1.6432322263717651, "step": 1621 }, { "epoch": 0.19, "learning_rate": 2.476413437902376e-07, "logits/chosen": -3.1440072059631348, "logits/rejected": -2.9725191593170166, "logps/chosen": -344.57000732421875, "logps/rejected": -251.28086853027344, "loss": 0.3917, "rewards/accuracies": 0.875, "rewards/chosen": -0.05121784657239914, "rewards/margins": 0.8979679346084595, "rewards/rejected": -0.9491857886314392, "step": 1622 }, { "epoch": 0.19, "learning_rate": 2.4760622732061336e-07, "logits/chosen": -3.5760037899017334, "logits/rejected": -3.423732280731201, "logps/chosen": -229.76626586914062, "logps/rejected": -239.6583251953125, "loss": 0.2293, "rewards/accuracies": 0.875, "rewards/chosen": 0.24821406602859497, "rewards/margins": 2.1366474628448486, "rewards/rejected": -1.8884334564208984, "step": 1623 }, { "epoch": 0.19, "learning_rate": 2.475711108509891e-07, "logits/chosen": -3.2058005332946777, "logits/rejected": -3.03995943069458, "logps/chosen": -243.10418701171875, "logps/rejected": -143.76434326171875, "loss": 0.9873, "rewards/accuracies": 0.625, "rewards/chosen": -0.8367437124252319, "rewards/margins": 0.39725565910339355, "rewards/rejected": -1.2339993715286255, "step": 1624 }, { "epoch": 0.19, "learning_rate": 2.4753599438136486e-07, "logits/chosen": -3.5770716667175293, "logits/rejected": -3.1531360149383545, "logps/chosen": -275.5059814453125, "logps/rejected": -303.7064208984375, "loss": 0.1833, "rewards/accuracies": 1.0, "rewards/chosen": 0.35189539194107056, "rewards/margins": 2.042738914489746, "rewards/rejected": -1.6908434629440308, "step": 1625 }, { "epoch": 0.19, "learning_rate": 2.4750087791174056e-07, "logits/chosen": -3.15144681930542, "logits/rejected": -3.310819149017334, "logps/chosen": -241.8475341796875, "logps/rejected": -401.38739013671875, "loss": 0.4702, "rewards/accuracies": 0.75, "rewards/chosen": -0.6157566905021667, "rewards/margins": 0.8701713681221008, "rewards/rejected": -1.4859280586242676, "step": 1626 }, { "epoch": 0.19, "learning_rate": 2.474657614421163e-07, "logits/chosen": -2.487565517425537, "logits/rejected": -2.5232656002044678, "logps/chosen": -169.6397705078125, "logps/rejected": -403.33953857421875, "loss": 0.4673, "rewards/accuracies": 0.875, "rewards/chosen": 0.20190362632274628, "rewards/margins": 0.8333655595779419, "rewards/rejected": -0.6314619183540344, "step": 1627 }, { "epoch": 0.19, "learning_rate": 2.4743064497249207e-07, "logits/chosen": -3.3822743892669678, "logits/rejected": -3.4034886360168457, "logps/chosen": -266.6377258300781, "logps/rejected": -218.320556640625, "loss": 0.3814, "rewards/accuracies": 0.75, "rewards/chosen": 0.02176293544471264, "rewards/margins": 1.3964648246765137, "rewards/rejected": -1.374701976776123, "step": 1628 }, { "epoch": 0.19, "learning_rate": 2.473955285028678e-07, "logits/chosen": -2.823197603225708, "logits/rejected": -2.691889762878418, "logps/chosen": -130.83242797851562, "logps/rejected": -304.0140380859375, "loss": 0.309, "rewards/accuracies": 0.875, "rewards/chosen": -0.12175226211547852, "rewards/margins": 1.6047595739364624, "rewards/rejected": -1.7265119552612305, "step": 1629 }, { "epoch": 0.19, "learning_rate": 2.473604120332436e-07, "logits/chosen": -3.1767563819885254, "logits/rejected": -2.942979335784912, "logps/chosen": -255.70855712890625, "logps/rejected": -243.71224975585938, "loss": 0.3013, "rewards/accuracies": 0.875, "rewards/chosen": 0.18955880403518677, "rewards/margins": 2.641592025756836, "rewards/rejected": -2.452033281326294, "step": 1630 }, { "epoch": 0.19, "learning_rate": 2.4732529556361933e-07, "logits/chosen": -2.874816417694092, "logits/rejected": -3.161055088043213, "logps/chosen": -351.501953125, "logps/rejected": -309.83587646484375, "loss": 0.3744, "rewards/accuracies": 0.625, "rewards/chosen": 0.5034058690071106, "rewards/margins": 1.476503610610962, "rewards/rejected": -0.9730978012084961, "step": 1631 }, { "epoch": 0.19, "learning_rate": 2.472901790939951e-07, "logits/chosen": -2.97882080078125, "logits/rejected": -2.8736541271209717, "logps/chosen": -361.94427490234375, "logps/rejected": -316.7669372558594, "loss": 0.3838, "rewards/accuracies": 1.0, "rewards/chosen": -0.3193373680114746, "rewards/margins": 0.9276111125946045, "rewards/rejected": -1.246948480606079, "step": 1632 }, { "epoch": 0.19, "learning_rate": 2.4725506262437084e-07, "logits/chosen": -3.559875011444092, "logits/rejected": -2.8241186141967773, "logps/chosen": -384.7473449707031, "logps/rejected": -192.31300354003906, "loss": 1.1148, "rewards/accuracies": 0.5, "rewards/chosen": -1.364538550376892, "rewards/margins": -0.20309188961982727, "rewards/rejected": -1.1614465713500977, "step": 1633 }, { "epoch": 0.19, "learning_rate": 2.4721994615474654e-07, "logits/chosen": -3.332775115966797, "logits/rejected": -3.0801808834075928, "logps/chosen": -124.4903793334961, "logps/rejected": -156.7494354248047, "loss": 0.376, "rewards/accuracies": 0.875, "rewards/chosen": 0.1827080249786377, "rewards/margins": 1.346949815750122, "rewards/rejected": -1.1642416715621948, "step": 1634 }, { "epoch": 0.19, "learning_rate": 2.471848296851223e-07, "logits/chosen": -3.0425779819488525, "logits/rejected": -3.290282726287842, "logps/chosen": -132.42999267578125, "logps/rejected": -190.01869201660156, "loss": 0.3994, "rewards/accuracies": 0.875, "rewards/chosen": -0.3373163342475891, "rewards/margins": 1.3747307062149048, "rewards/rejected": -1.7120471000671387, "step": 1635 }, { "epoch": 0.19, "learning_rate": 2.4714971321549805e-07, "logits/chosen": -2.9455347061157227, "logits/rejected": -3.003751516342163, "logps/chosen": -214.91864013671875, "logps/rejected": -134.51930236816406, "loss": 0.4332, "rewards/accuracies": 0.875, "rewards/chosen": 0.111922986805439, "rewards/margins": 0.8513867259025574, "rewards/rejected": -0.7394638061523438, "step": 1636 }, { "epoch": 0.19, "learning_rate": 2.471145967458738e-07, "logits/chosen": -2.219520330429077, "logits/rejected": -2.2625210285186768, "logps/chosen": -683.4371337890625, "logps/rejected": -396.0511474609375, "loss": 0.3544, "rewards/accuracies": 0.875, "rewards/chosen": 0.44750234484672546, "rewards/margins": 1.6360344886779785, "rewards/rejected": -1.1885322332382202, "step": 1637 }, { "epoch": 0.19, "learning_rate": 2.4707948027624956e-07, "logits/chosen": -2.7788584232330322, "logits/rejected": -2.84614896774292, "logps/chosen": -158.1094207763672, "logps/rejected": -207.4463653564453, "loss": 0.3494, "rewards/accuracies": 0.75, "rewards/chosen": 0.20430941879749298, "rewards/margins": 1.6551119089126587, "rewards/rejected": -1.4508025646209717, "step": 1638 }, { "epoch": 0.19, "learning_rate": 2.470443638066253e-07, "logits/chosen": -3.3876399993896484, "logits/rejected": -3.3464629650115967, "logps/chosen": -280.7225036621094, "logps/rejected": -251.35427856445312, "loss": 0.3093, "rewards/accuracies": 0.875, "rewards/chosen": 0.08294618874788284, "rewards/margins": 1.8079230785369873, "rewards/rejected": -1.7249767780303955, "step": 1639 }, { "epoch": 0.19, "learning_rate": 2.47009247337001e-07, "logits/chosen": -3.1355056762695312, "logits/rejected": -2.62129545211792, "logps/chosen": -331.9334411621094, "logps/rejected": -186.84597778320312, "loss": 0.3948, "rewards/accuracies": 0.75, "rewards/chosen": 0.15465782582759857, "rewards/margins": 1.5128577947616577, "rewards/rejected": -1.3581998348236084, "step": 1640 }, { "epoch": 0.19, "learning_rate": 2.469741308673768e-07, "logits/chosen": -3.623434066772461, "logits/rejected": -3.3550500869750977, "logps/chosen": -249.16636657714844, "logps/rejected": -232.155517578125, "loss": 0.328, "rewards/accuracies": 0.75, "rewards/chosen": -0.09672051668167114, "rewards/margins": 1.829206943511963, "rewards/rejected": -1.9259274005889893, "step": 1641 }, { "epoch": 0.19, "learning_rate": 2.469390143977525e-07, "logits/chosen": -3.4303834438323975, "logits/rejected": -3.0111262798309326, "logps/chosen": -236.13787841796875, "logps/rejected": -266.9262390136719, "loss": 0.8809, "rewards/accuracies": 0.375, "rewards/chosen": -0.5209572315216064, "rewards/margins": -0.024635910987854004, "rewards/rejected": -0.4963212311267853, "step": 1642 }, { "epoch": 0.19, "learning_rate": 2.469038979281283e-07, "logits/chosen": -2.565387725830078, "logits/rejected": -2.8425614833831787, "logps/chosen": -409.16015625, "logps/rejected": -297.3311462402344, "loss": 0.5821, "rewards/accuracies": 0.625, "rewards/chosen": -0.21906308829784393, "rewards/margins": 0.6376460790634155, "rewards/rejected": -0.8567091822624207, "step": 1643 }, { "epoch": 0.19, "learning_rate": 2.4686878145850403e-07, "logits/chosen": -3.0870227813720703, "logits/rejected": -2.839993476867676, "logps/chosen": -328.0035400390625, "logps/rejected": -280.96343994140625, "loss": 0.8185, "rewards/accuracies": 0.375, "rewards/chosen": -0.7000449299812317, "rewards/margins": -0.14338122308254242, "rewards/rejected": -0.5566637516021729, "step": 1644 }, { "epoch": 0.19, "learning_rate": 2.468336649888798e-07, "logits/chosen": -2.892493724822998, "logits/rejected": -3.0386791229248047, "logps/chosen": -384.6796875, "logps/rejected": -214.46194458007812, "loss": 0.5413, "rewards/accuracies": 0.625, "rewards/chosen": -0.2288389652967453, "rewards/margins": 0.7741386890411377, "rewards/rejected": -1.002977728843689, "step": 1645 }, { "epoch": 0.19, "learning_rate": 2.4679854851925554e-07, "logits/chosen": -3.484879970550537, "logits/rejected": -3.3117287158966064, "logps/chosen": -94.59874725341797, "logps/rejected": -121.7508544921875, "loss": 0.5459, "rewards/accuracies": 0.625, "rewards/chosen": -0.32910534739494324, "rewards/margins": 0.854915976524353, "rewards/rejected": -1.184021234512329, "step": 1646 }, { "epoch": 0.19, "learning_rate": 2.467634320496313e-07, "logits/chosen": -2.977233409881592, "logits/rejected": -2.946171283721924, "logps/chosen": -269.04193115234375, "logps/rejected": -274.8897705078125, "loss": 0.5668, "rewards/accuracies": 0.5, "rewards/chosen": 0.020627934485673904, "rewards/margins": 0.8076647520065308, "rewards/rejected": -0.7870368361473083, "step": 1647 }, { "epoch": 0.19, "learning_rate": 2.46728315580007e-07, "logits/chosen": -2.4336202144622803, "logits/rejected": -2.4982075691223145, "logps/chosen": -405.12652587890625, "logps/rejected": -240.6546173095703, "loss": 0.5605, "rewards/accuracies": 0.75, "rewards/chosen": 0.09702956676483154, "rewards/margins": 0.6522217392921448, "rewards/rejected": -0.555192232131958, "step": 1648 }, { "epoch": 0.19, "learning_rate": 2.4669319911038274e-07, "logits/chosen": -2.708063840866089, "logits/rejected": -2.780207633972168, "logps/chosen": -476.9808349609375, "logps/rejected": -361.608154296875, "loss": 0.3275, "rewards/accuracies": 0.75, "rewards/chosen": 0.2493128478527069, "rewards/margins": 2.3930797576904297, "rewards/rejected": -2.1437668800354004, "step": 1649 }, { "epoch": 0.19, "learning_rate": 2.466580826407585e-07, "logits/chosen": -3.2944164276123047, "logits/rejected": -2.7720751762390137, "logps/chosen": -247.1857452392578, "logps/rejected": -195.83348083496094, "loss": 0.5702, "rewards/accuracies": 0.625, "rewards/chosen": -0.16384786367416382, "rewards/margins": 0.4309402406215668, "rewards/rejected": -0.594788134098053, "step": 1650 }, { "epoch": 0.19, "learning_rate": 2.4662296617113425e-07, "logits/chosen": -3.4813716411590576, "logits/rejected": -3.4087066650390625, "logps/chosen": -381.19873046875, "logps/rejected": -309.81585693359375, "loss": 0.376, "rewards/accuracies": 0.875, "rewards/chosen": -0.01619626022875309, "rewards/margins": 1.618360996246338, "rewards/rejected": -1.6345571279525757, "step": 1651 }, { "epoch": 0.19, "learning_rate": 2.4658784970151e-07, "logits/chosen": -2.406052827835083, "logits/rejected": -2.5095372200012207, "logps/chosen": -281.9344482421875, "logps/rejected": -227.5969696044922, "loss": 0.5148, "rewards/accuracies": 0.75, "rewards/chosen": 0.1824236363172531, "rewards/margins": 0.706309974193573, "rewards/rejected": -0.5238863229751587, "step": 1652 }, { "epoch": 0.19, "learning_rate": 2.4655273323188576e-07, "logits/chosen": -3.4611401557922363, "logits/rejected": -3.5146594047546387, "logps/chosen": -179.00900268554688, "logps/rejected": -212.01885986328125, "loss": 0.5409, "rewards/accuracies": 0.625, "rewards/chosen": -0.22202692925930023, "rewards/margins": 1.269743800163269, "rewards/rejected": -1.4917707443237305, "step": 1653 }, { "epoch": 0.19, "learning_rate": 2.465176167622615e-07, "logits/chosen": -2.8392858505249023, "logits/rejected": -3.058464765548706, "logps/chosen": -178.03025817871094, "logps/rejected": -181.70912170410156, "loss": 0.4875, "rewards/accuracies": 0.625, "rewards/chosen": -0.15759813785552979, "rewards/margins": 1.230353832244873, "rewards/rejected": -1.3879520893096924, "step": 1654 }, { "epoch": 0.19, "learning_rate": 2.4648250029263727e-07, "logits/chosen": -3.1389269828796387, "logits/rejected": -3.3271584510803223, "logps/chosen": -234.2293701171875, "logps/rejected": -287.00726318359375, "loss": 0.3277, "rewards/accuracies": 0.875, "rewards/chosen": 0.035910964012145996, "rewards/margins": 1.3347375392913818, "rewards/rejected": -1.2988265752792358, "step": 1655 }, { "epoch": 0.19, "learning_rate": 2.4644738382301297e-07, "logits/chosen": -2.915689706802368, "logits/rejected": -2.8571135997772217, "logps/chosen": -208.79710388183594, "logps/rejected": -190.15957641601562, "loss": 0.7201, "rewards/accuracies": 0.625, "rewards/chosen": -0.18519210815429688, "rewards/margins": 0.19241377711296082, "rewards/rejected": -0.3776058554649353, "step": 1656 }, { "epoch": 0.19, "learning_rate": 2.464122673533887e-07, "logits/chosen": -3.790785312652588, "logits/rejected": -3.59560489654541, "logps/chosen": -170.98670959472656, "logps/rejected": -183.72364807128906, "loss": 0.3532, "rewards/accuracies": 0.875, "rewards/chosen": 0.1265781819820404, "rewards/margins": 1.4742681980133057, "rewards/rejected": -1.3476901054382324, "step": 1657 }, { "epoch": 0.19, "learning_rate": 2.463771508837645e-07, "logits/chosen": -3.4610581398010254, "logits/rejected": -3.166311502456665, "logps/chosen": -238.10324096679688, "logps/rejected": -184.92642211914062, "loss": 0.3836, "rewards/accuracies": 0.875, "rewards/chosen": -0.35447806119918823, "rewards/margins": 0.9791409969329834, "rewards/rejected": -1.3336191177368164, "step": 1658 }, { "epoch": 0.19, "learning_rate": 2.4634203441414023e-07, "logits/chosen": -3.5480051040649414, "logits/rejected": -3.7581186294555664, "logps/chosen": -177.24977111816406, "logps/rejected": -183.33998107910156, "loss": 0.2825, "rewards/accuracies": 1.0, "rewards/chosen": 0.17221404612064362, "rewards/margins": 1.644298791885376, "rewards/rejected": -1.4720847606658936, "step": 1659 }, { "epoch": 0.19, "learning_rate": 2.46306917944516e-07, "logits/chosen": -3.974099636077881, "logits/rejected": -3.930417537689209, "logps/chosen": -110.23602294921875, "logps/rejected": -145.34014892578125, "loss": 0.5658, "rewards/accuracies": 0.625, "rewards/chosen": 0.3136996626853943, "rewards/margins": 1.275432825088501, "rewards/rejected": -0.9617331624031067, "step": 1660 }, { "epoch": 0.19, "learning_rate": 2.462718014748917e-07, "logits/chosen": -2.651881694793701, "logits/rejected": -2.5829646587371826, "logps/chosen": -360.2419738769531, "logps/rejected": -173.4150390625, "loss": 0.6656, "rewards/accuracies": 0.625, "rewards/chosen": -0.13943196833133698, "rewards/margins": 0.27662673592567444, "rewards/rejected": -0.4160586893558502, "step": 1661 }, { "epoch": 0.19, "learning_rate": 2.4623668500526744e-07, "logits/chosen": -3.396226167678833, "logits/rejected": -3.121123790740967, "logps/chosen": -244.54550170898438, "logps/rejected": -232.8888702392578, "loss": 0.5882, "rewards/accuracies": 0.625, "rewards/chosen": 0.10487784445285797, "rewards/margins": 0.7730016708374023, "rewards/rejected": -0.6681237816810608, "step": 1662 }, { "epoch": 0.19, "learning_rate": 2.462015685356432e-07, "logits/chosen": -3.088339328765869, "logits/rejected": -3.0391621589660645, "logps/chosen": -164.5384979248047, "logps/rejected": -203.97634887695312, "loss": 0.4186, "rewards/accuracies": 0.875, "rewards/chosen": -0.3430846035480499, "rewards/margins": 1.0082159042358398, "rewards/rejected": -1.3513004779815674, "step": 1663 }, { "epoch": 0.19, "learning_rate": 2.4616645206601895e-07, "logits/chosen": -2.111482858657837, "logits/rejected": -2.158494472503662, "logps/chosen": -326.66357421875, "logps/rejected": -313.67120361328125, "loss": 0.2229, "rewards/accuracies": 1.0, "rewards/chosen": 0.2633962333202362, "rewards/margins": 1.6975619792938232, "rewards/rejected": -1.4341658353805542, "step": 1664 }, { "epoch": 0.19, "learning_rate": 2.461313355963947e-07, "logits/chosen": -3.597100019454956, "logits/rejected": -3.314906120300293, "logps/chosen": -319.1592102050781, "logps/rejected": -219.0665283203125, "loss": 0.4606, "rewards/accuracies": 0.75, "rewards/chosen": 0.4545048773288727, "rewards/margins": 1.2645498514175415, "rewards/rejected": -0.8100448250770569, "step": 1665 }, { "epoch": 0.19, "learning_rate": 2.4609621912677045e-07, "logits/chosen": -2.8511533737182617, "logits/rejected": -2.468804121017456, "logps/chosen": -193.9290008544922, "logps/rejected": -281.9854431152344, "loss": 0.3209, "rewards/accuracies": 0.75, "rewards/chosen": 0.08376835286617279, "rewards/margins": 1.9418946504592896, "rewards/rejected": -1.8581262826919556, "step": 1666 }, { "epoch": 0.19, "learning_rate": 2.460611026571462e-07, "logits/chosen": -3.6707606315612793, "logits/rejected": -3.9513099193573, "logps/chosen": -247.72445678710938, "logps/rejected": -239.5735626220703, "loss": 0.2644, "rewards/accuracies": 0.875, "rewards/chosen": 0.054384276270866394, "rewards/margins": 1.82422935962677, "rewards/rejected": -1.7698450088500977, "step": 1667 }, { "epoch": 0.19, "learning_rate": 2.4602598618752196e-07, "logits/chosen": -2.993516683578491, "logits/rejected": -3.0110347270965576, "logps/chosen": -246.90931701660156, "logps/rejected": -226.88943481445312, "loss": 0.3784, "rewards/accuracies": 1.0, "rewards/chosen": 0.09634879231452942, "rewards/margins": 0.9972802400588989, "rewards/rejected": -0.9009314775466919, "step": 1668 }, { "epoch": 0.19, "learning_rate": 2.4599086971789766e-07, "logits/chosen": -2.969608783721924, "logits/rejected": -2.9063379764556885, "logps/chosen": -429.904541015625, "logps/rejected": -353.58367919921875, "loss": 0.523, "rewards/accuracies": 0.5, "rewards/chosen": 0.24493397772312164, "rewards/margins": 0.780466616153717, "rewards/rejected": -0.5355326533317566, "step": 1669 }, { "epoch": 0.19, "learning_rate": 2.459557532482734e-07, "logits/chosen": -3.3365840911865234, "logits/rejected": -2.877293348312378, "logps/chosen": -253.65997314453125, "logps/rejected": -122.17088317871094, "loss": 0.395, "rewards/accuracies": 0.875, "rewards/chosen": 0.2868715524673462, "rewards/margins": 1.0084301233291626, "rewards/rejected": -0.7215585708618164, "step": 1670 }, { "epoch": 0.19, "learning_rate": 2.4592063677864917e-07, "logits/chosen": -3.4404802322387695, "logits/rejected": -2.9636712074279785, "logps/chosen": -287.96759033203125, "logps/rejected": -350.8348083496094, "loss": 0.2991, "rewards/accuracies": 0.75, "rewards/chosen": 0.31366777420043945, "rewards/margins": 1.5818394422531128, "rewards/rejected": -1.268171787261963, "step": 1671 }, { "epoch": 0.19, "learning_rate": 2.458855203090249e-07, "logits/chosen": -2.742400884628296, "logits/rejected": -2.623107433319092, "logps/chosen": -309.1057434082031, "logps/rejected": -261.9565124511719, "loss": 0.3543, "rewards/accuracies": 0.875, "rewards/chosen": -0.03688700497150421, "rewards/margins": 1.1435816287994385, "rewards/rejected": -1.1804687976837158, "step": 1672 }, { "epoch": 0.19, "learning_rate": 2.458504038394007e-07, "logits/chosen": -3.473637819290161, "logits/rejected": -3.3117377758026123, "logps/chosen": -359.1307678222656, "logps/rejected": -293.23370361328125, "loss": 0.3229, "rewards/accuracies": 0.875, "rewards/chosen": -0.12932097911834717, "rewards/margins": 1.6799559593200684, "rewards/rejected": -1.8092769384384155, "step": 1673 }, { "epoch": 0.19, "learning_rate": 2.458152873697764e-07, "logits/chosen": -2.7435302734375, "logits/rejected": -2.567420721054077, "logps/chosen": -303.9747009277344, "logps/rejected": -349.2685241699219, "loss": 0.2237, "rewards/accuracies": 1.0, "rewards/chosen": 0.05257215350866318, "rewards/margins": 2.1518969535827637, "rewards/rejected": -2.0993247032165527, "step": 1674 }, { "epoch": 0.19, "learning_rate": 2.457801709001522e-07, "logits/chosen": -3.03609561920166, "logits/rejected": -3.044161558151245, "logps/chosen": -300.92315673828125, "logps/rejected": -237.2432861328125, "loss": 0.5763, "rewards/accuracies": 0.625, "rewards/chosen": -0.5415917038917542, "rewards/margins": 0.6007184982299805, "rewards/rejected": -1.1423102617263794, "step": 1675 }, { "epoch": 0.19, "learning_rate": 2.4574505443052794e-07, "logits/chosen": -3.8496711254119873, "logits/rejected": -3.8292016983032227, "logps/chosen": -229.2249755859375, "logps/rejected": -220.6937255859375, "loss": 0.1224, "rewards/accuracies": 1.0, "rewards/chosen": 0.7747334241867065, "rewards/margins": 2.4123823642730713, "rewards/rejected": -1.6376488208770752, "step": 1676 }, { "epoch": 0.19, "learning_rate": 2.4570993796090364e-07, "logits/chosen": -3.068362236022949, "logits/rejected": -3.0622189044952393, "logps/chosen": -218.0693359375, "logps/rejected": -265.5788269042969, "loss": 0.461, "rewards/accuracies": 0.75, "rewards/chosen": 0.35873308777809143, "rewards/margins": 1.80496346950531, "rewards/rejected": -1.446230411529541, "step": 1677 }, { "epoch": 0.19, "learning_rate": 2.456748214912794e-07, "logits/chosen": -3.4159510135650635, "logits/rejected": -3.345960855484009, "logps/chosen": -264.47808837890625, "logps/rejected": -382.75909423828125, "loss": 0.584, "rewards/accuracies": 0.75, "rewards/chosen": 0.06448528170585632, "rewards/margins": 1.444566011428833, "rewards/rejected": -1.3800805807113647, "step": 1678 }, { "epoch": 0.19, "learning_rate": 2.4563970502165515e-07, "logits/chosen": -2.7105515003204346, "logits/rejected": -2.5486345291137695, "logps/chosen": -242.34515380859375, "logps/rejected": -214.1011505126953, "loss": 0.7554, "rewards/accuracies": 0.375, "rewards/chosen": -0.8463466763496399, "rewards/margins": -0.007047310471534729, "rewards/rejected": -0.8392993807792664, "step": 1679 }, { "epoch": 0.19, "learning_rate": 2.456045885520309e-07, "logits/chosen": -3.4149301052093506, "logits/rejected": -3.4928789138793945, "logps/chosen": -435.8739013671875, "logps/rejected": -348.6937255859375, "loss": 0.3822, "rewards/accuracies": 0.875, "rewards/chosen": -0.26740455627441406, "rewards/margins": 1.4628195762634277, "rewards/rejected": -1.7302241325378418, "step": 1680 }, { "epoch": 0.19, "learning_rate": 2.4556947208240666e-07, "logits/chosen": -3.772813558578491, "logits/rejected": -3.413177490234375, "logps/chosen": -217.1726837158203, "logps/rejected": -241.602294921875, "loss": 0.3121, "rewards/accuracies": 0.875, "rewards/chosen": 0.10687769949436188, "rewards/margins": 1.5374398231506348, "rewards/rejected": -1.4305620193481445, "step": 1681 }, { "epoch": 0.19, "learning_rate": 2.4553435561278236e-07, "logits/chosen": -2.9016683101654053, "logits/rejected": -3.014634370803833, "logps/chosen": -204.0310516357422, "logps/rejected": -267.7081298828125, "loss": 0.8202, "rewards/accuracies": 0.625, "rewards/chosen": 0.09997561573982239, "rewards/margins": 0.43439704179763794, "rewards/rejected": -0.33442142605781555, "step": 1682 }, { "epoch": 0.19, "learning_rate": 2.454992391431581e-07, "logits/chosen": -3.5605297088623047, "logits/rejected": -3.615485191345215, "logps/chosen": -351.8419494628906, "logps/rejected": -258.89398193359375, "loss": 0.4648, "rewards/accuracies": 0.75, "rewards/chosen": 0.14443156123161316, "rewards/margins": 1.2567256689071655, "rewards/rejected": -1.1122941970825195, "step": 1683 }, { "epoch": 0.19, "learning_rate": 2.4546412267353386e-07, "logits/chosen": -2.4477345943450928, "logits/rejected": -2.362621307373047, "logps/chosen": -360.66473388671875, "logps/rejected": -328.2407531738281, "loss": 0.2151, "rewards/accuracies": 0.875, "rewards/chosen": 0.4126198887825012, "rewards/margins": 2.459287166595459, "rewards/rejected": -2.0466673374176025, "step": 1684 }, { "epoch": 0.19, "learning_rate": 2.454290062039096e-07, "logits/chosen": -2.911421775817871, "logits/rejected": -2.9039158821105957, "logps/chosen": -290.2357482910156, "logps/rejected": -322.86077880859375, "loss": 0.4581, "rewards/accuracies": 0.875, "rewards/chosen": 0.45506545901298523, "rewards/margins": 1.1540563106536865, "rewards/rejected": -0.6989908218383789, "step": 1685 }, { "epoch": 0.19, "learning_rate": 2.4539388973428537e-07, "logits/chosen": -3.6172256469726562, "logits/rejected": -3.466050863265991, "logps/chosen": -274.277587890625, "logps/rejected": -239.30740356445312, "loss": 0.3698, "rewards/accuracies": 0.75, "rewards/chosen": -0.17416085302829742, "rewards/margins": 1.41618013381958, "rewards/rejected": -1.5903409719467163, "step": 1686 }, { "epoch": 0.19, "learning_rate": 2.453587732646611e-07, "logits/chosen": -3.0522522926330566, "logits/rejected": -2.8346526622772217, "logps/chosen": -237.89743041992188, "logps/rejected": -269.82427978515625, "loss": 0.1814, "rewards/accuracies": 1.0, "rewards/chosen": 0.2388889491558075, "rewards/margins": 2.0487163066864014, "rewards/rejected": -1.809827446937561, "step": 1687 }, { "epoch": 0.19, "learning_rate": 2.453236567950369e-07, "logits/chosen": -3.385484457015991, "logits/rejected": -3.126699447631836, "logps/chosen": -373.0326843261719, "logps/rejected": -381.6529541015625, "loss": 0.4801, "rewards/accuracies": 0.875, "rewards/chosen": 0.25851303339004517, "rewards/margins": 0.924397349357605, "rewards/rejected": -0.665884256362915, "step": 1688 }, { "epoch": 0.19, "learning_rate": 2.4528854032541263e-07, "logits/chosen": -2.7377686500549316, "logits/rejected": -3.031897783279419, "logps/chosen": -166.29563903808594, "logps/rejected": -186.349853515625, "loss": 0.3233, "rewards/accuracies": 0.875, "rewards/chosen": 0.19604967534542084, "rewards/margins": 1.9597545862197876, "rewards/rejected": -1.7637050151824951, "step": 1689 }, { "epoch": 0.19, "learning_rate": 2.4525342385578833e-07, "logits/chosen": -2.6053786277770996, "logits/rejected": -2.7166953086853027, "logps/chosen": -226.43463134765625, "logps/rejected": -382.5266418457031, "loss": 0.5322, "rewards/accuracies": 0.75, "rewards/chosen": 0.12152568250894547, "rewards/margins": 1.2979834079742432, "rewards/rejected": -1.1764576435089111, "step": 1690 }, { "epoch": 0.19, "learning_rate": 2.452183073861641e-07, "logits/chosen": -2.9455792903900146, "logits/rejected": -3.1394710540771484, "logps/chosen": -270.2583312988281, "logps/rejected": -280.0306091308594, "loss": 0.287, "rewards/accuracies": 0.875, "rewards/chosen": 0.41307783126831055, "rewards/margins": 2.348109245300293, "rewards/rejected": -1.9350314140319824, "step": 1691 }, { "epoch": 0.2, "learning_rate": 2.4518319091653984e-07, "logits/chosen": -3.4704463481903076, "logits/rejected": -3.4533281326293945, "logps/chosen": -146.36410522460938, "logps/rejected": -296.54022216796875, "loss": 0.3781, "rewards/accuracies": 0.875, "rewards/chosen": -0.3706258535385132, "rewards/margins": 1.2620245218276978, "rewards/rejected": -1.6326501369476318, "step": 1692 }, { "epoch": 0.2, "learning_rate": 2.451480744469156e-07, "logits/chosen": -3.0328855514526367, "logits/rejected": -3.118960380554199, "logps/chosen": -194.57818603515625, "logps/rejected": -155.09303283691406, "loss": 0.5282, "rewards/accuracies": 0.625, "rewards/chosen": 0.05055350065231323, "rewards/margins": 0.6075702905654907, "rewards/rejected": -0.5570167899131775, "step": 1693 }, { "epoch": 0.2, "learning_rate": 2.4511295797729135e-07, "logits/chosen": -3.051926851272583, "logits/rejected": -2.7508809566497803, "logps/chosen": -231.03656005859375, "logps/rejected": -224.9603729248047, "loss": 0.4777, "rewards/accuracies": 0.625, "rewards/chosen": -0.32188910245895386, "rewards/margins": 1.3823513984680176, "rewards/rejected": -1.7042404413223267, "step": 1694 }, { "epoch": 0.2, "learning_rate": 2.4507784150766705e-07, "logits/chosen": -3.5282094478607178, "logits/rejected": -3.548593759536743, "logps/chosen": -113.14512634277344, "logps/rejected": -176.37330627441406, "loss": 0.7163, "rewards/accuracies": 0.5, "rewards/chosen": -0.07644618302583694, "rewards/margins": 0.49860531091690063, "rewards/rejected": -0.5750515460968018, "step": 1695 }, { "epoch": 0.2, "learning_rate": 2.450427250380428e-07, "logits/chosen": -3.172929286956787, "logits/rejected": -3.1298131942749023, "logps/chosen": -335.30352783203125, "logps/rejected": -267.81561279296875, "loss": 0.2931, "rewards/accuracies": 1.0, "rewards/chosen": 0.6214680075645447, "rewards/margins": 1.3313868045806885, "rewards/rejected": -0.7099188566207886, "step": 1696 }, { "epoch": 0.2, "learning_rate": 2.450076085684186e-07, "logits/chosen": -3.5110409259796143, "logits/rejected": -3.172805070877075, "logps/chosen": -215.44664001464844, "logps/rejected": -164.82928466796875, "loss": 0.5507, "rewards/accuracies": 0.625, "rewards/chosen": 0.10024100542068481, "rewards/margins": 1.1242871284484863, "rewards/rejected": -1.0240461826324463, "step": 1697 }, { "epoch": 0.2, "learning_rate": 2.449724920987943e-07, "logits/chosen": -2.1990904808044434, "logits/rejected": -2.3453094959259033, "logps/chosen": -334.96484375, "logps/rejected": -214.06854248046875, "loss": 0.3733, "rewards/accuracies": 0.75, "rewards/chosen": 0.04938840866088867, "rewards/margins": 1.071897268295288, "rewards/rejected": -1.022508978843689, "step": 1698 }, { "epoch": 0.2, "learning_rate": 2.4493737562917007e-07, "logits/chosen": -3.865786075592041, "logits/rejected": -3.9234697818756104, "logps/chosen": -198.1676025390625, "logps/rejected": -221.02821350097656, "loss": 0.4109, "rewards/accuracies": 0.625, "rewards/chosen": -0.20778770744800568, "rewards/margins": 1.049099087715149, "rewards/rejected": -1.2568868398666382, "step": 1699 }, { "epoch": 0.2, "learning_rate": 2.449022591595458e-07, "logits/chosen": -3.484004259109497, "logits/rejected": -3.4127964973449707, "logps/chosen": -383.62982177734375, "logps/rejected": -201.284912109375, "loss": 0.4026, "rewards/accuracies": 0.75, "rewards/chosen": 0.25389915704727173, "rewards/margins": 1.5683817863464355, "rewards/rejected": -1.3144826889038086, "step": 1700 }, { "epoch": 0.2, "learning_rate": 2.448671426899216e-07, "logits/chosen": -3.958258867263794, "logits/rejected": -3.5568466186523438, "logps/chosen": -222.71250915527344, "logps/rejected": -252.0777587890625, "loss": 0.4138, "rewards/accuracies": 0.875, "rewards/chosen": -0.46280574798583984, "rewards/margins": 1.4746692180633545, "rewards/rejected": -1.9374749660491943, "step": 1701 }, { "epoch": 0.2, "learning_rate": 2.4483202622029733e-07, "logits/chosen": -3.428539752960205, "logits/rejected": -3.9102351665496826, "logps/chosen": -199.1168212890625, "logps/rejected": -260.75555419921875, "loss": 0.2859, "rewards/accuracies": 1.0, "rewards/chosen": -0.4801642596721649, "rewards/margins": 2.066012144088745, "rewards/rejected": -2.5461764335632324, "step": 1702 }, { "epoch": 0.2, "learning_rate": 2.4479690975067303e-07, "logits/chosen": -3.283670663833618, "logits/rejected": -2.9816439151763916, "logps/chosen": -335.1788330078125, "logps/rejected": -328.33154296875, "loss": 0.3861, "rewards/accuracies": 0.875, "rewards/chosen": 0.3351360559463501, "rewards/margins": 1.7112128734588623, "rewards/rejected": -1.3760769367218018, "step": 1703 }, { "epoch": 0.2, "learning_rate": 2.447617932810488e-07, "logits/chosen": -3.535792589187622, "logits/rejected": -3.7741522789001465, "logps/chosen": -179.83258056640625, "logps/rejected": -217.40878295898438, "loss": 0.498, "rewards/accuracies": 0.875, "rewards/chosen": -0.30200010538101196, "rewards/margins": 1.3488916158676147, "rewards/rejected": -1.6508917808532715, "step": 1704 }, { "epoch": 0.2, "learning_rate": 2.4472667681142454e-07, "logits/chosen": -3.020387887954712, "logits/rejected": -2.9802629947662354, "logps/chosen": -201.28756713867188, "logps/rejected": -322.14471435546875, "loss": 0.6051, "rewards/accuracies": 0.875, "rewards/chosen": -0.46486616134643555, "rewards/margins": 2.1346938610076904, "rewards/rejected": -2.599560022354126, "step": 1705 }, { "epoch": 0.2, "learning_rate": 2.446915603418003e-07, "logits/chosen": -2.835179328918457, "logits/rejected": -2.9476020336151123, "logps/chosen": -166.61573791503906, "logps/rejected": -185.72549438476562, "loss": 0.1763, "rewards/accuracies": 1.0, "rewards/chosen": 0.581467866897583, "rewards/margins": 2.1834447383880615, "rewards/rejected": -1.6019768714904785, "step": 1706 }, { "epoch": 0.2, "learning_rate": 2.4465644387217604e-07, "logits/chosen": -3.1465251445770264, "logits/rejected": -3.119692325592041, "logps/chosen": -336.3077697753906, "logps/rejected": -339.3916015625, "loss": 0.4005, "rewards/accuracies": 0.75, "rewards/chosen": 0.5570852160453796, "rewards/margins": 1.4699203968048096, "rewards/rejected": -0.9128351807594299, "step": 1707 }, { "epoch": 0.2, "learning_rate": 2.4462132740255175e-07, "logits/chosen": -2.6089582443237305, "logits/rejected": -2.737736940383911, "logps/chosen": -306.18524169921875, "logps/rejected": -177.68496704101562, "loss": 0.4207, "rewards/accuracies": 0.75, "rewards/chosen": -0.39408519864082336, "rewards/margins": 1.044629454612732, "rewards/rejected": -1.4387147426605225, "step": 1708 }, { "epoch": 0.2, "learning_rate": 2.4458621093292755e-07, "logits/chosen": -3.9313507080078125, "logits/rejected": -3.967329502105713, "logps/chosen": -193.96939086914062, "logps/rejected": -202.03741455078125, "loss": 0.5294, "rewards/accuracies": 0.625, "rewards/chosen": -0.7465137243270874, "rewards/margins": 0.8238089680671692, "rewards/rejected": -1.5703226327896118, "step": 1709 }, { "epoch": 0.2, "learning_rate": 2.445510944633033e-07, "logits/chosen": -3.155323028564453, "logits/rejected": -2.9358575344085693, "logps/chosen": -260.6053161621094, "logps/rejected": -324.3551330566406, "loss": 0.5311, "rewards/accuracies": 0.75, "rewards/chosen": -0.2441752403974533, "rewards/margins": 1.0690574645996094, "rewards/rejected": -1.313232660293579, "step": 1710 }, { "epoch": 0.2, "learning_rate": 2.44515977993679e-07, "logits/chosen": -3.089785099029541, "logits/rejected": -2.845656394958496, "logps/chosen": -315.4617919921875, "logps/rejected": -249.2813720703125, "loss": 0.4313, "rewards/accuracies": 0.875, "rewards/chosen": 0.2886127829551697, "rewards/margins": 1.4697961807250977, "rewards/rejected": -1.1811834573745728, "step": 1711 }, { "epoch": 0.2, "learning_rate": 2.4448086152405476e-07, "logits/chosen": -2.4240732192993164, "logits/rejected": -2.5394866466522217, "logps/chosen": -318.2694396972656, "logps/rejected": -220.41360473632812, "loss": 0.7321, "rewards/accuracies": 0.625, "rewards/chosen": 0.08090024441480637, "rewards/margins": 0.4448111355304718, "rewards/rejected": -0.36391085386276245, "step": 1712 }, { "epoch": 0.2, "learning_rate": 2.444457450544305e-07, "logits/chosen": -2.9358439445495605, "logits/rejected": -2.8569648265838623, "logps/chosen": -264.7394104003906, "logps/rejected": -341.2603759765625, "loss": 0.3664, "rewards/accuracies": 1.0, "rewards/chosen": 0.36451125144958496, "rewards/margins": 1.431419849395752, "rewards/rejected": -1.066908597946167, "step": 1713 }, { "epoch": 0.2, "learning_rate": 2.4441062858480627e-07, "logits/chosen": -2.8864569664001465, "logits/rejected": -2.8941972255706787, "logps/chosen": -390.8206481933594, "logps/rejected": -242.85182189941406, "loss": 0.4984, "rewards/accuracies": 0.625, "rewards/chosen": -0.29410824179649353, "rewards/margins": 0.7023544311523438, "rewards/rejected": -0.9964627027511597, "step": 1714 }, { "epoch": 0.2, "learning_rate": 2.44375512115182e-07, "logits/chosen": -3.104605197906494, "logits/rejected": -3.1454367637634277, "logps/chosen": -270.9232177734375, "logps/rejected": -248.38262939453125, "loss": 0.3654, "rewards/accuracies": 0.75, "rewards/chosen": -0.26806002855300903, "rewards/margins": 1.5399221181869507, "rewards/rejected": -1.8079822063446045, "step": 1715 }, { "epoch": 0.2, "learning_rate": 2.443403956455577e-07, "logits/chosen": -2.3233776092529297, "logits/rejected": -2.3871142864227295, "logps/chosen": -317.3779602050781, "logps/rejected": -330.9116516113281, "loss": 0.2406, "rewards/accuracies": 0.875, "rewards/chosen": 0.837905764579773, "rewards/margins": 2.1452691555023193, "rewards/rejected": -1.3073632717132568, "step": 1716 }, { "epoch": 0.2, "learning_rate": 2.443052791759335e-07, "logits/chosen": -2.879929542541504, "logits/rejected": -2.6622700691223145, "logps/chosen": -258.1183776855469, "logps/rejected": -275.150146484375, "loss": 0.4213, "rewards/accuracies": 0.75, "rewards/chosen": 0.16815334558486938, "rewards/margins": 1.212031364440918, "rewards/rejected": -1.0438780784606934, "step": 1717 }, { "epoch": 0.2, "learning_rate": 2.4427016270630923e-07, "logits/chosen": -2.9937429428100586, "logits/rejected": -2.935084819793701, "logps/chosen": -116.81980895996094, "logps/rejected": -149.3265380859375, "loss": 0.4749, "rewards/accuracies": 0.875, "rewards/chosen": -0.32410258054733276, "rewards/margins": 0.6127547025680542, "rewards/rejected": -0.9368573427200317, "step": 1718 }, { "epoch": 0.2, "learning_rate": 2.44235046236685e-07, "logits/chosen": -2.908172369003296, "logits/rejected": -2.993743419647217, "logps/chosen": -167.93698120117188, "logps/rejected": -131.21994018554688, "loss": 0.438, "rewards/accuracies": 0.875, "rewards/chosen": -0.24395276606082916, "rewards/margins": 0.6851309537887573, "rewards/rejected": -0.9290837049484253, "step": 1719 }, { "epoch": 0.2, "learning_rate": 2.4419992976706074e-07, "logits/chosen": -2.7098655700683594, "logits/rejected": -2.7113876342773438, "logps/chosen": -374.839111328125, "logps/rejected": -212.05490112304688, "loss": 0.7125, "rewards/accuracies": 0.625, "rewards/chosen": -0.7810652256011963, "rewards/margins": 0.5353448987007141, "rewards/rejected": -1.3164100646972656, "step": 1720 }, { "epoch": 0.2, "learning_rate": 2.441648132974365e-07, "logits/chosen": -3.228445529937744, "logits/rejected": -3.542778491973877, "logps/chosen": -333.4078063964844, "logps/rejected": -213.998046875, "loss": 0.378, "rewards/accuracies": 0.875, "rewards/chosen": -0.22177620232105255, "rewards/margins": 1.0722817182540894, "rewards/rejected": -1.294057846069336, "step": 1721 }, { "epoch": 0.2, "learning_rate": 2.4412969682781225e-07, "logits/chosen": -2.91353702545166, "logits/rejected": -2.7158563137054443, "logps/chosen": -313.9520263671875, "logps/rejected": -291.3470458984375, "loss": 0.3472, "rewards/accuracies": 0.875, "rewards/chosen": -0.30379754304885864, "rewards/margins": 1.3417613506317139, "rewards/rejected": -1.6455589532852173, "step": 1722 }, { "epoch": 0.2, "learning_rate": 2.44094580358188e-07, "logits/chosen": -3.5439860820770264, "logits/rejected": -3.260101318359375, "logps/chosen": -221.77609252929688, "logps/rejected": -125.55340576171875, "loss": 0.4755, "rewards/accuracies": 0.75, "rewards/chosen": 0.20629605650901794, "rewards/margins": 0.8871363401412964, "rewards/rejected": -0.680840253829956, "step": 1723 }, { "epoch": 0.2, "learning_rate": 2.440594638885637e-07, "logits/chosen": -3.0384671688079834, "logits/rejected": -3.3780710697174072, "logps/chosen": -154.1116485595703, "logps/rejected": -139.92042541503906, "loss": 0.5778, "rewards/accuracies": 0.625, "rewards/chosen": 0.0461917370557785, "rewards/margins": 1.5937776565551758, "rewards/rejected": -1.5475858449935913, "step": 1724 }, { "epoch": 0.2, "learning_rate": 2.4402434741893945e-07, "logits/chosen": -3.8238131999969482, "logits/rejected": -3.6845932006835938, "logps/chosen": -184.49453735351562, "logps/rejected": -186.496826171875, "loss": 0.2961, "rewards/accuracies": 0.875, "rewards/chosen": 0.544116199016571, "rewards/margins": 2.154240608215332, "rewards/rejected": -1.6101243495941162, "step": 1725 }, { "epoch": 0.2, "learning_rate": 2.439892309493152e-07, "logits/chosen": -3.196857452392578, "logits/rejected": -2.6698999404907227, "logps/chosen": -425.947509765625, "logps/rejected": -417.09698486328125, "loss": 0.1993, "rewards/accuracies": 1.0, "rewards/chosen": 0.38281041383743286, "rewards/margins": 1.9237964153289795, "rewards/rejected": -1.5409860610961914, "step": 1726 }, { "epoch": 0.2, "learning_rate": 2.4395411447969096e-07, "logits/chosen": -2.7583677768707275, "logits/rejected": -2.70349383354187, "logps/chosen": -247.20159912109375, "logps/rejected": -234.02919006347656, "loss": 0.2426, "rewards/accuracies": 1.0, "rewards/chosen": 0.05214317888021469, "rewards/margins": 1.598447322845459, "rewards/rejected": -1.5463039875030518, "step": 1727 }, { "epoch": 0.2, "learning_rate": 2.439189980100667e-07, "logits/chosen": -2.710408926010132, "logits/rejected": -2.939810037612915, "logps/chosen": -278.5091247558594, "logps/rejected": -244.74429321289062, "loss": 0.4702, "rewards/accuracies": 0.75, "rewards/chosen": -0.04341694712638855, "rewards/margins": 1.023653268814087, "rewards/rejected": -1.0670702457427979, "step": 1728 }, { "epoch": 0.2, "learning_rate": 2.4388388154044247e-07, "logits/chosen": -3.2340686321258545, "logits/rejected": -2.949521541595459, "logps/chosen": -463.640625, "logps/rejected": -356.5784912109375, "loss": 0.2296, "rewards/accuracies": 1.0, "rewards/chosen": 0.4406130015850067, "rewards/margins": 2.4163620471954346, "rewards/rejected": -1.975749135017395, "step": 1729 }, { "epoch": 0.2, "learning_rate": 2.4384876507081817e-07, "logits/chosen": -3.5759692192077637, "logits/rejected": -3.338381052017212, "logps/chosen": -153.74571228027344, "logps/rejected": -142.6878204345703, "loss": 0.5895, "rewards/accuracies": 0.75, "rewards/chosen": -0.4222008287906647, "rewards/margins": 0.6961211562156677, "rewards/rejected": -1.1183220148086548, "step": 1730 }, { "epoch": 0.2, "learning_rate": 2.43813648601194e-07, "logits/chosen": -3.119556188583374, "logits/rejected": -3.105013608932495, "logps/chosen": -300.7023010253906, "logps/rejected": -431.5294494628906, "loss": 0.4579, "rewards/accuracies": 0.625, "rewards/chosen": -0.029314160346984863, "rewards/margins": 1.0032093524932861, "rewards/rejected": -1.0325233936309814, "step": 1731 }, { "epoch": 0.2, "learning_rate": 2.437785321315697e-07, "logits/chosen": -2.997035026550293, "logits/rejected": -2.7745275497436523, "logps/chosen": -202.52383422851562, "logps/rejected": -222.5989227294922, "loss": 0.1695, "rewards/accuracies": 1.0, "rewards/chosen": 0.06041629612445831, "rewards/margins": 2.066164255142212, "rewards/rejected": -2.0057480335235596, "step": 1732 }, { "epoch": 0.2, "learning_rate": 2.4374341566194543e-07, "logits/chosen": -3.2654011249542236, "logits/rejected": -3.137331962585449, "logps/chosen": -197.5450439453125, "logps/rejected": -220.6737060546875, "loss": 0.3962, "rewards/accuracies": 0.875, "rewards/chosen": -0.37919241189956665, "rewards/margins": 1.2437317371368408, "rewards/rejected": -1.6229242086410522, "step": 1733 }, { "epoch": 0.2, "learning_rate": 2.437082991923212e-07, "logits/chosen": -3.7312777042388916, "logits/rejected": -3.562410354614258, "logps/chosen": -129.6446533203125, "logps/rejected": -143.5083770751953, "loss": 0.8129, "rewards/accuracies": 0.5, "rewards/chosen": -0.3476855754852295, "rewards/margins": 0.4746500253677368, "rewards/rejected": -0.8223356008529663, "step": 1734 }, { "epoch": 0.2, "learning_rate": 2.4367318272269694e-07, "logits/chosen": -3.0206260681152344, "logits/rejected": -3.1526198387145996, "logps/chosen": -281.50970458984375, "logps/rejected": -304.1826171875, "loss": 0.1389, "rewards/accuracies": 1.0, "rewards/chosen": -0.17513632774353027, "rewards/margins": 2.536421775817871, "rewards/rejected": -2.7115581035614014, "step": 1735 }, { "epoch": 0.2, "learning_rate": 2.436380662530727e-07, "logits/chosen": -3.179959297180176, "logits/rejected": -3.2266836166381836, "logps/chosen": -245.88204956054688, "logps/rejected": -242.5856475830078, "loss": 0.527, "rewards/accuracies": 0.875, "rewards/chosen": -0.14690721035003662, "rewards/margins": 1.217486023902893, "rewards/rejected": -1.3643931150436401, "step": 1736 }, { "epoch": 0.2, "learning_rate": 2.4360294978344845e-07, "logits/chosen": -3.715092420578003, "logits/rejected": -3.394807815551758, "logps/chosen": -283.05084228515625, "logps/rejected": -348.5787658691406, "loss": 0.4673, "rewards/accuracies": 0.875, "rewards/chosen": -0.21136708557605743, "rewards/margins": 0.8256571292877197, "rewards/rejected": -1.0370242595672607, "step": 1737 }, { "epoch": 0.2, "learning_rate": 2.4356783331382415e-07, "logits/chosen": -3.5574638843536377, "logits/rejected": -3.2605228424072266, "logps/chosen": -156.2772674560547, "logps/rejected": -130.5850830078125, "loss": 0.2812, "rewards/accuracies": 0.875, "rewards/chosen": 0.44039177894592285, "rewards/margins": 1.5975128412246704, "rewards/rejected": -1.1571210622787476, "step": 1738 }, { "epoch": 0.2, "learning_rate": 2.435327168441999e-07, "logits/chosen": -3.084960460662842, "logits/rejected": -3.178116798400879, "logps/chosen": -142.37596130371094, "logps/rejected": -140.02943420410156, "loss": 0.5473, "rewards/accuracies": 0.875, "rewards/chosen": -0.10475844889879227, "rewards/margins": 1.0007109642028809, "rewards/rejected": -1.1054693460464478, "step": 1739 }, { "epoch": 0.2, "learning_rate": 2.4349760037457566e-07, "logits/chosen": -3.067857265472412, "logits/rejected": -3.519385814666748, "logps/chosen": -247.3993377685547, "logps/rejected": -305.21099853515625, "loss": 0.1719, "rewards/accuracies": 1.0, "rewards/chosen": -0.04354758560657501, "rewards/margins": 2.7898519039154053, "rewards/rejected": -2.833399772644043, "step": 1740 }, { "epoch": 0.2, "learning_rate": 2.434624839049514e-07, "logits/chosen": -3.057948112487793, "logits/rejected": -3.0198819637298584, "logps/chosen": -141.63870239257812, "logps/rejected": -297.5664978027344, "loss": 0.3248, "rewards/accuracies": 0.875, "rewards/chosen": -0.10936189442873001, "rewards/margins": 2.323464870452881, "rewards/rejected": -2.4328267574310303, "step": 1741 }, { "epoch": 0.2, "learning_rate": 2.4342736743532716e-07, "logits/chosen": -3.2447316646575928, "logits/rejected": -3.6341748237609863, "logps/chosen": -145.13330078125, "logps/rejected": -252.46206665039062, "loss": 0.4078, "rewards/accuracies": 0.75, "rewards/chosen": -0.12699754536151886, "rewards/margins": 1.1056121587753296, "rewards/rejected": -1.2326096296310425, "step": 1742 }, { "epoch": 0.2, "learning_rate": 2.433922509657029e-07, "logits/chosen": -3.209233283996582, "logits/rejected": -3.231194496154785, "logps/chosen": -150.76914978027344, "logps/rejected": -193.64834594726562, "loss": 0.3733, "rewards/accuracies": 0.875, "rewards/chosen": 0.17111843824386597, "rewards/margins": 1.3984018564224243, "rewards/rejected": -1.2272834777832031, "step": 1743 }, { "epoch": 0.2, "learning_rate": 2.4335713449607867e-07, "logits/chosen": -3.407757043838501, "logits/rejected": -3.283406972885132, "logps/chosen": -152.8195037841797, "logps/rejected": -162.044677734375, "loss": 0.474, "rewards/accuracies": 0.625, "rewards/chosen": -0.1427876055240631, "rewards/margins": 1.247788667678833, "rewards/rejected": -1.3905761241912842, "step": 1744 }, { "epoch": 0.2, "learning_rate": 2.433220180264544e-07, "logits/chosen": -3.3044116497039795, "logits/rejected": -3.735154867172241, "logps/chosen": -173.23471069335938, "logps/rejected": -206.9846649169922, "loss": 0.4777, "rewards/accuracies": 0.75, "rewards/chosen": 0.08235792070627213, "rewards/margins": 1.034445881843567, "rewards/rejected": -0.9520879983901978, "step": 1745 }, { "epoch": 0.2, "learning_rate": 2.4328690155683013e-07, "logits/chosen": -3.3846535682678223, "logits/rejected": -3.1004958152770996, "logps/chosen": -254.22317504882812, "logps/rejected": -308.4259033203125, "loss": 0.3208, "rewards/accuracies": 0.875, "rewards/chosen": -0.39039522409439087, "rewards/margins": 1.439897060394287, "rewards/rejected": -1.8302921056747437, "step": 1746 }, { "epoch": 0.2, "learning_rate": 2.432517850872059e-07, "logits/chosen": -3.0105745792388916, "logits/rejected": -2.939602851867676, "logps/chosen": -201.98741149902344, "logps/rejected": -240.71258544921875, "loss": 0.3625, "rewards/accuracies": 0.875, "rewards/chosen": 0.20477108657360077, "rewards/margins": 1.049839735031128, "rewards/rejected": -0.8450685739517212, "step": 1747 }, { "epoch": 0.2, "learning_rate": 2.4321666861758163e-07, "logits/chosen": -3.1395387649536133, "logits/rejected": -3.0131208896636963, "logps/chosen": -410.95562744140625, "logps/rejected": -329.695556640625, "loss": 0.1892, "rewards/accuracies": 1.0, "rewards/chosen": 0.15863315761089325, "rewards/margins": 1.9727343320846558, "rewards/rejected": -1.814100980758667, "step": 1748 }, { "epoch": 0.2, "learning_rate": 2.431815521479574e-07, "logits/chosen": -3.1851792335510254, "logits/rejected": -3.1301703453063965, "logps/chosen": -287.33148193359375, "logps/rejected": -360.2151794433594, "loss": 0.3618, "rewards/accuracies": 0.875, "rewards/chosen": -0.7716159820556641, "rewards/margins": 1.8083436489105225, "rewards/rejected": -2.5799596309661865, "step": 1749 }, { "epoch": 0.2, "learning_rate": 2.4314643567833314e-07, "logits/chosen": -3.543879508972168, "logits/rejected": -3.985924482345581, "logps/chosen": -221.46536254882812, "logps/rejected": -311.0965881347656, "loss": 0.1786, "rewards/accuracies": 1.0, "rewards/chosen": 0.8416170477867126, "rewards/margins": 2.7182304859161377, "rewards/rejected": -1.8766134977340698, "step": 1750 }, { "epoch": 0.2, "learning_rate": 2.4311131920870884e-07, "logits/chosen": -3.168619394302368, "logits/rejected": -3.195664405822754, "logps/chosen": -148.01206970214844, "logps/rejected": -181.02862548828125, "loss": 0.511, "rewards/accuracies": 0.75, "rewards/chosen": 0.20444738864898682, "rewards/margins": 0.570600688457489, "rewards/rejected": -0.3661532402038574, "step": 1751 }, { "epoch": 0.2, "learning_rate": 2.430762027390846e-07, "logits/chosen": -2.8880393505096436, "logits/rejected": -2.936306953430176, "logps/chosen": -169.31515502929688, "logps/rejected": -223.66928100585938, "loss": 0.7079, "rewards/accuracies": 0.625, "rewards/chosen": 0.06547676026821136, "rewards/margins": 0.41497230529785156, "rewards/rejected": -0.3494955003261566, "step": 1752 }, { "epoch": 0.2, "learning_rate": 2.430410862694604e-07, "logits/chosen": -2.6722052097320557, "logits/rejected": -2.9412734508514404, "logps/chosen": -224.75271606445312, "logps/rejected": -232.43492126464844, "loss": 0.5417, "rewards/accuracies": 0.75, "rewards/chosen": -0.3493640124797821, "rewards/margins": 1.1181914806365967, "rewards/rejected": -1.4675554037094116, "step": 1753 }, { "epoch": 0.2, "learning_rate": 2.430059697998361e-07, "logits/chosen": -3.102931499481201, "logits/rejected": -3.0654520988464355, "logps/chosen": -219.181640625, "logps/rejected": -202.31582641601562, "loss": 0.3232, "rewards/accuracies": 0.75, "rewards/chosen": 0.05130919814109802, "rewards/margins": 1.838759183883667, "rewards/rejected": -1.787449836730957, "step": 1754 }, { "epoch": 0.2, "learning_rate": 2.4297085333021186e-07, "logits/chosen": -3.968606472015381, "logits/rejected": -3.7249157428741455, "logps/chosen": -326.8946228027344, "logps/rejected": -296.5503234863281, "loss": 0.4936, "rewards/accuracies": 0.875, "rewards/chosen": -0.0016686655580997467, "rewards/margins": 1.3206899166107178, "rewards/rejected": -1.3223586082458496, "step": 1755 }, { "epoch": 0.2, "learning_rate": 2.429357368605876e-07, "logits/chosen": -2.62625789642334, "logits/rejected": -2.566938877105713, "logps/chosen": -366.4497985839844, "logps/rejected": -326.11541748046875, "loss": 0.1458, "rewards/accuracies": 1.0, "rewards/chosen": 0.12545126676559448, "rewards/margins": 2.252603054046631, "rewards/rejected": -2.1271517276763916, "step": 1756 }, { "epoch": 0.2, "learning_rate": 2.4290062039096337e-07, "logits/chosen": -2.5912160873413086, "logits/rejected": -2.966123104095459, "logps/chosen": -415.5066223144531, "logps/rejected": -333.67755126953125, "loss": 0.6167, "rewards/accuracies": 0.625, "rewards/chosen": -0.2567613124847412, "rewards/margins": 0.9944959878921509, "rewards/rejected": -1.251257300376892, "step": 1757 }, { "epoch": 0.2, "learning_rate": 2.428655039213391e-07, "logits/chosen": -3.5497848987579346, "logits/rejected": -3.349867820739746, "logps/chosen": -205.57315063476562, "logps/rejected": -155.82171630859375, "loss": 0.5414, "rewards/accuracies": 0.75, "rewards/chosen": -0.25648921728134155, "rewards/margins": 0.8320026397705078, "rewards/rejected": -1.0884917974472046, "step": 1758 }, { "epoch": 0.2, "learning_rate": 2.428303874517148e-07, "logits/chosen": -3.1462998390197754, "logits/rejected": -3.194227457046509, "logps/chosen": -282.48895263671875, "logps/rejected": -287.45025634765625, "loss": 0.6516, "rewards/accuracies": 0.625, "rewards/chosen": -0.273932546377182, "rewards/margins": 0.48718565702438354, "rewards/rejected": -0.7611181735992432, "step": 1759 }, { "epoch": 0.2, "learning_rate": 2.427952709820906e-07, "logits/chosen": -2.7691938877105713, "logits/rejected": -2.6362013816833496, "logps/chosen": -198.17068481445312, "logps/rejected": -255.22598266601562, "loss": 0.1862, "rewards/accuracies": 0.875, "rewards/chosen": 0.3451490104198456, "rewards/margins": 2.164900779724121, "rewards/rejected": -1.8197517395019531, "step": 1760 }, { "epoch": 0.2, "learning_rate": 2.4276015451246633e-07, "logits/chosen": -3.1371588706970215, "logits/rejected": -2.9938597679138184, "logps/chosen": -301.0254211425781, "logps/rejected": -191.77513122558594, "loss": 0.5222, "rewards/accuracies": 0.75, "rewards/chosen": -0.07813544571399689, "rewards/margins": 0.5883846879005432, "rewards/rejected": -0.6665201187133789, "step": 1761 }, { "epoch": 0.2, "learning_rate": 2.427250380428421e-07, "logits/chosen": -3.436466932296753, "logits/rejected": -3.3969531059265137, "logps/chosen": -234.4161376953125, "logps/rejected": -255.92367553710938, "loss": 0.2263, "rewards/accuracies": 0.875, "rewards/chosen": 0.10461831092834473, "rewards/margins": 2.507021188735962, "rewards/rejected": -2.402402639389038, "step": 1762 }, { "epoch": 0.2, "learning_rate": 2.4268992157321784e-07, "logits/chosen": -2.299417018890381, "logits/rejected": -2.2770919799804688, "logps/chosen": -356.7138366699219, "logps/rejected": -264.33868408203125, "loss": 0.4145, "rewards/accuracies": 0.875, "rewards/chosen": 0.055784545838832855, "rewards/margins": 1.0526140928268433, "rewards/rejected": -0.9968295097351074, "step": 1763 }, { "epoch": 0.2, "learning_rate": 2.4265480510359354e-07, "logits/chosen": -3.2793633937835693, "logits/rejected": -3.563754081726074, "logps/chosen": -231.0777587890625, "logps/rejected": -268.3797302246094, "loss": 0.2034, "rewards/accuracies": 1.0, "rewards/chosen": -0.2699204087257385, "rewards/margins": 2.083373785018921, "rewards/rejected": -2.3532941341400146, "step": 1764 }, { "epoch": 0.2, "learning_rate": 2.4261968863396934e-07, "logits/chosen": -2.8059375286102295, "logits/rejected": -2.6914925575256348, "logps/chosen": -362.2037353515625, "logps/rejected": -276.865478515625, "loss": 0.5461, "rewards/accuracies": 0.75, "rewards/chosen": 0.03437815606594086, "rewards/margins": 0.8932045698165894, "rewards/rejected": -0.8588263988494873, "step": 1765 }, { "epoch": 0.2, "learning_rate": 2.425845721643451e-07, "logits/chosen": -2.980407238006592, "logits/rejected": -2.9198200702667236, "logps/chosen": -241.3873291015625, "logps/rejected": -203.42881774902344, "loss": 0.3265, "rewards/accuracies": 0.875, "rewards/chosen": 0.1854814887046814, "rewards/margins": 1.3325400352478027, "rewards/rejected": -1.1470584869384766, "step": 1766 }, { "epoch": 0.2, "learning_rate": 2.425494556947208e-07, "logits/chosen": -2.8503692150115967, "logits/rejected": -2.576404333114624, "logps/chosen": -250.96826171875, "logps/rejected": -249.5489959716797, "loss": 0.6724, "rewards/accuracies": 0.625, "rewards/chosen": -0.2590703070163727, "rewards/margins": 0.9337478280067444, "rewards/rejected": -1.1928181648254395, "step": 1767 }, { "epoch": 0.2, "learning_rate": 2.4251433922509655e-07, "logits/chosen": -3.231877326965332, "logits/rejected": -3.326113700866699, "logps/chosen": -263.3506164550781, "logps/rejected": -364.3380126953125, "loss": 0.2686, "rewards/accuracies": 0.75, "rewards/chosen": -0.1327046900987625, "rewards/margins": 2.1899170875549316, "rewards/rejected": -2.3226218223571777, "step": 1768 }, { "epoch": 0.2, "learning_rate": 2.424792227554723e-07, "logits/chosen": -3.074404001235962, "logits/rejected": -3.400165319442749, "logps/chosen": -196.53921508789062, "logps/rejected": -223.18572998046875, "loss": 0.4385, "rewards/accuracies": 0.75, "rewards/chosen": 0.08249306678771973, "rewards/margins": 1.394784927368164, "rewards/rejected": -1.3122918605804443, "step": 1769 }, { "epoch": 0.2, "learning_rate": 2.4244410628584806e-07, "logits/chosen": -3.3985276222229004, "logits/rejected": -3.268855094909668, "logps/chosen": -205.62542724609375, "logps/rejected": -243.33216857910156, "loss": 0.1672, "rewards/accuracies": 0.875, "rewards/chosen": 0.0461481511592865, "rewards/margins": 2.5665283203125, "rewards/rejected": -2.5203800201416016, "step": 1770 }, { "epoch": 0.2, "learning_rate": 2.424089898162238e-07, "logits/chosen": -3.0864343643188477, "logits/rejected": -3.0680220127105713, "logps/chosen": -234.63291931152344, "logps/rejected": -234.44036865234375, "loss": 0.1305, "rewards/accuracies": 1.0, "rewards/chosen": 0.7403955459594727, "rewards/margins": 2.4730758666992188, "rewards/rejected": -1.7326805591583252, "step": 1771 }, { "epoch": 0.2, "learning_rate": 2.423738733465995e-07, "logits/chosen": -3.602097749710083, "logits/rejected": -3.4309592247009277, "logps/chosen": -254.64076232910156, "logps/rejected": -185.6962432861328, "loss": 0.3006, "rewards/accuracies": 0.875, "rewards/chosen": -0.10131794214248657, "rewards/margins": 1.7651445865631104, "rewards/rejected": -1.8664624691009521, "step": 1772 }, { "epoch": 0.2, "learning_rate": 2.4233875687697527e-07, "logits/chosen": -3.531369924545288, "logits/rejected": -3.3974971771240234, "logps/chosen": -229.64987182617188, "logps/rejected": -189.36959838867188, "loss": 0.6074, "rewards/accuracies": 0.75, "rewards/chosen": -0.5496854782104492, "rewards/margins": 0.9148344993591309, "rewards/rejected": -1.46451997756958, "step": 1773 }, { "epoch": 0.2, "learning_rate": 2.423036404073511e-07, "logits/chosen": -2.960831880569458, "logits/rejected": -2.939600944519043, "logps/chosen": -314.62371826171875, "logps/rejected": -248.36761474609375, "loss": 0.3668, "rewards/accuracies": 0.875, "rewards/chosen": 0.06180811673402786, "rewards/margins": 1.4093976020812988, "rewards/rejected": -1.3475894927978516, "step": 1774 }, { "epoch": 0.2, "learning_rate": 2.422685239377268e-07, "logits/chosen": -3.187558889389038, "logits/rejected": -2.793586015701294, "logps/chosen": -311.04852294921875, "logps/rejected": -264.7544250488281, "loss": 0.7614, "rewards/accuracies": 0.625, "rewards/chosen": -0.66237473487854, "rewards/margins": 0.4096294343471527, "rewards/rejected": -1.0720041990280151, "step": 1775 }, { "epoch": 0.2, "learning_rate": 2.4223340746810253e-07, "logits/chosen": -3.724465847015381, "logits/rejected": -3.553802251815796, "logps/chosen": -337.3489990234375, "logps/rejected": -285.360107421875, "loss": 0.9469, "rewards/accuracies": 0.5, "rewards/chosen": -0.595371425151825, "rewards/margins": 0.12990513443946838, "rewards/rejected": -0.725276529788971, "step": 1776 }, { "epoch": 0.2, "learning_rate": 2.421982909984783e-07, "logits/chosen": -3.092574119567871, "logits/rejected": -2.7128357887268066, "logps/chosen": -335.2371826171875, "logps/rejected": -387.388671875, "loss": 0.2613, "rewards/accuracies": 0.875, "rewards/chosen": 0.02709682285785675, "rewards/margins": 2.2923853397369385, "rewards/rejected": -2.2652883529663086, "step": 1777 }, { "epoch": 0.2, "learning_rate": 2.4216317452885404e-07, "logits/chosen": -3.393723964691162, "logits/rejected": -3.2259602546691895, "logps/chosen": -458.0068664550781, "logps/rejected": -289.3234558105469, "loss": 0.5024, "rewards/accuracies": 0.75, "rewards/chosen": 0.060724783688783646, "rewards/margins": 0.9477744698524475, "rewards/rejected": -0.887049674987793, "step": 1778 }, { "epoch": 0.21, "learning_rate": 2.421280580592298e-07, "logits/chosen": -2.087739944458008, "logits/rejected": -2.4449591636657715, "logps/chosen": -708.1103515625, "logps/rejected": -219.73358154296875, "loss": 0.4084, "rewards/accuracies": 0.875, "rewards/chosen": -0.1941252052783966, "rewards/margins": 1.3678836822509766, "rewards/rejected": -1.5620088577270508, "step": 1779 }, { "epoch": 0.21, "learning_rate": 2.420929415896055e-07, "logits/chosen": -2.605710983276367, "logits/rejected": -3.0942349433898926, "logps/chosen": -277.34613037109375, "logps/rejected": -361.6842041015625, "loss": 0.2663, "rewards/accuracies": 0.875, "rewards/chosen": 0.7183920741081238, "rewards/margins": 2.0134165287017822, "rewards/rejected": -1.2950243949890137, "step": 1780 }, { "epoch": 0.21, "learning_rate": 2.4205782511998125e-07, "logits/chosen": -2.2303647994995117, "logits/rejected": -2.428788185119629, "logps/chosen": -570.49560546875, "logps/rejected": -409.5047607421875, "loss": 0.606, "rewards/accuracies": 0.75, "rewards/chosen": -0.18222299218177795, "rewards/margins": 0.8466431498527527, "rewards/rejected": -1.0288660526275635, "step": 1781 }, { "epoch": 0.21, "learning_rate": 2.42022708650357e-07, "logits/chosen": -3.7756333351135254, "logits/rejected": -4.063043594360352, "logps/chosen": -313.5496520996094, "logps/rejected": -283.4927978515625, "loss": 0.4209, "rewards/accuracies": 0.75, "rewards/chosen": -0.08129837363958359, "rewards/margins": 2.766404628753662, "rewards/rejected": -2.847702980041504, "step": 1782 }, { "epoch": 0.21, "learning_rate": 2.4198759218073276e-07, "logits/chosen": -2.6750128269195557, "logits/rejected": -2.6356160640716553, "logps/chosen": -285.88848876953125, "logps/rejected": -291.33819580078125, "loss": 0.6745, "rewards/accuracies": 0.625, "rewards/chosen": 0.08191380649805069, "rewards/margins": 0.2532702088356018, "rewards/rejected": -0.1713564246892929, "step": 1783 }, { "epoch": 0.21, "learning_rate": 2.419524757111085e-07, "logits/chosen": -3.120246410369873, "logits/rejected": -3.0747880935668945, "logps/chosen": -249.4274444580078, "logps/rejected": -186.0854034423828, "loss": 0.6817, "rewards/accuracies": 0.625, "rewards/chosen": -0.2302771508693695, "rewards/margins": 0.22528696060180664, "rewards/rejected": -0.45556414127349854, "step": 1784 }, { "epoch": 0.21, "learning_rate": 2.419173592414842e-07, "logits/chosen": -3.5686984062194824, "logits/rejected": -3.3988704681396484, "logps/chosen": -272.94580078125, "logps/rejected": -291.3069763183594, "loss": 0.2544, "rewards/accuracies": 1.0, "rewards/chosen": 0.19512201845645905, "rewards/margins": 1.856284737586975, "rewards/rejected": -1.6611627340316772, "step": 1785 }, { "epoch": 0.21, "learning_rate": 2.4188224277185996e-07, "logits/chosen": -2.2110629081726074, "logits/rejected": -2.4659082889556885, "logps/chosen": -506.2607421875, "logps/rejected": -282.80792236328125, "loss": 0.6111, "rewards/accuracies": 0.625, "rewards/chosen": -0.5762144327163696, "rewards/margins": 0.8014272451400757, "rewards/rejected": -1.3776417970657349, "step": 1786 }, { "epoch": 0.21, "learning_rate": 2.4184712630223577e-07, "logits/chosen": -2.9010021686553955, "logits/rejected": -2.916661024093628, "logps/chosen": -250.02883911132812, "logps/rejected": -255.36077880859375, "loss": 0.3921, "rewards/accuracies": 0.625, "rewards/chosen": -0.5074890851974487, "rewards/margins": 1.1834965944290161, "rewards/rejected": -1.6909856796264648, "step": 1787 }, { "epoch": 0.21, "learning_rate": 2.4181200983261147e-07, "logits/chosen": -2.786046028137207, "logits/rejected": -3.1116459369659424, "logps/chosen": -268.6700744628906, "logps/rejected": -234.49667358398438, "loss": 0.6217, "rewards/accuracies": 0.625, "rewards/chosen": -0.220791757106781, "rewards/margins": 0.3611282706260681, "rewards/rejected": -0.5819200277328491, "step": 1788 }, { "epoch": 0.21, "learning_rate": 2.417768933629872e-07, "logits/chosen": -3.4463820457458496, "logits/rejected": -3.401262044906616, "logps/chosen": -364.42596435546875, "logps/rejected": -306.0172119140625, "loss": 0.2977, "rewards/accuracies": 1.0, "rewards/chosen": 0.18457360565662384, "rewards/margins": 1.403897762298584, "rewards/rejected": -1.2193242311477661, "step": 1789 }, { "epoch": 0.21, "learning_rate": 2.41741776893363e-07, "logits/chosen": -2.675659656524658, "logits/rejected": -2.673980712890625, "logps/chosen": -243.4378204345703, "logps/rejected": -305.2392883300781, "loss": 0.6264, "rewards/accuracies": 0.75, "rewards/chosen": 0.28893381357192993, "rewards/margins": 1.033085823059082, "rewards/rejected": -0.7441520690917969, "step": 1790 }, { "epoch": 0.21, "learning_rate": 2.4170666042373873e-07, "logits/chosen": -3.7508912086486816, "logits/rejected": -3.563750743865967, "logps/chosen": -174.86825561523438, "logps/rejected": -190.00830078125, "loss": 0.4222, "rewards/accuracies": 0.75, "rewards/chosen": -0.0638531893491745, "rewards/margins": 1.1816695928573608, "rewards/rejected": -1.2455228567123413, "step": 1791 }, { "epoch": 0.21, "learning_rate": 2.416715439541145e-07, "logits/chosen": -3.052027702331543, "logits/rejected": -3.2551157474517822, "logps/chosen": -415.6919860839844, "logps/rejected": -411.8841552734375, "loss": 0.6046, "rewards/accuracies": 0.875, "rewards/chosen": 0.43954360485076904, "rewards/margins": 1.5440583229064941, "rewards/rejected": -1.1045148372650146, "step": 1792 }, { "epoch": 0.21, "learning_rate": 2.416364274844902e-07, "logits/chosen": -2.98317289352417, "logits/rejected": -2.884835958480835, "logps/chosen": -298.021484375, "logps/rejected": -192.96871948242188, "loss": 0.4656, "rewards/accuracies": 0.75, "rewards/chosen": -0.15477770566940308, "rewards/margins": 1.5699098110198975, "rewards/rejected": -1.7246874570846558, "step": 1793 }, { "epoch": 0.21, "learning_rate": 2.4160131101486594e-07, "logits/chosen": -3.640326499938965, "logits/rejected": -3.507678985595703, "logps/chosen": -203.41212463378906, "logps/rejected": -143.59127807617188, "loss": 0.5265, "rewards/accuracies": 0.875, "rewards/chosen": -0.2634912431240082, "rewards/margins": 1.0048516988754272, "rewards/rejected": -1.2683429718017578, "step": 1794 }, { "epoch": 0.21, "learning_rate": 2.415661945452417e-07, "logits/chosen": -2.677821636199951, "logits/rejected": -2.4985947608947754, "logps/chosen": -368.5274658203125, "logps/rejected": -215.2764129638672, "loss": 0.2058, "rewards/accuracies": 1.0, "rewards/chosen": 0.5737054347991943, "rewards/margins": 2.0907516479492188, "rewards/rejected": -1.5170462131500244, "step": 1795 }, { "epoch": 0.21, "learning_rate": 2.4153107807561745e-07, "logits/chosen": -3.1011600494384766, "logits/rejected": -2.89839506149292, "logps/chosen": -129.42120361328125, "logps/rejected": -184.5872039794922, "loss": 0.4552, "rewards/accuracies": 0.625, "rewards/chosen": 0.023982517421245575, "rewards/margins": 0.815593957901001, "rewards/rejected": -0.7916114330291748, "step": 1796 }, { "epoch": 0.21, "learning_rate": 2.414959616059932e-07, "logits/chosen": -2.995195150375366, "logits/rejected": -2.937110424041748, "logps/chosen": -414.18896484375, "logps/rejected": -450.25115966796875, "loss": 0.5461, "rewards/accuracies": 0.75, "rewards/chosen": -0.10179929435253143, "rewards/margins": 0.9477005004882812, "rewards/rejected": -1.0494998693466187, "step": 1797 }, { "epoch": 0.21, "learning_rate": 2.4146084513636896e-07, "logits/chosen": -3.33390212059021, "logits/rejected": -3.1867642402648926, "logps/chosen": -207.8813018798828, "logps/rejected": -312.7217102050781, "loss": 0.2633, "rewards/accuracies": 0.875, "rewards/chosen": -0.1668022871017456, "rewards/margins": 2.1615781784057617, "rewards/rejected": -2.328380584716797, "step": 1798 }, { "epoch": 0.21, "learning_rate": 2.414257286667447e-07, "logits/chosen": -3.5038247108459473, "logits/rejected": -3.532654047012329, "logps/chosen": -163.7206573486328, "logps/rejected": -285.15093994140625, "loss": 0.3881, "rewards/accuracies": 0.875, "rewards/chosen": 0.49594974517822266, "rewards/margins": 1.4398472309112549, "rewards/rejected": -0.9438974857330322, "step": 1799 }, { "epoch": 0.21, "learning_rate": 2.4139061219712046e-07, "logits/chosen": -3.6769142150878906, "logits/rejected": -3.689378023147583, "logps/chosen": -215.927978515625, "logps/rejected": -210.45753479003906, "loss": 0.3028, "rewards/accuracies": 0.875, "rewards/chosen": 0.24080538749694824, "rewards/margins": 1.33527410030365, "rewards/rejected": -1.0944687128067017, "step": 1800 }, { "epoch": 0.21, "learning_rate": 2.4135549572749617e-07, "logits/chosen": -2.9768176078796387, "logits/rejected": -2.789314031600952, "logps/chosen": -262.72369384765625, "logps/rejected": -140.95094299316406, "loss": 0.7002, "rewards/accuracies": 0.875, "rewards/chosen": -0.15552113950252533, "rewards/margins": 0.8602361679077148, "rewards/rejected": -1.0157573223114014, "step": 1801 }, { "epoch": 0.21, "learning_rate": 2.413203792578719e-07, "logits/chosen": -3.4500019550323486, "logits/rejected": -3.491673231124878, "logps/chosen": -341.31494140625, "logps/rejected": -317.71319580078125, "loss": 0.2155, "rewards/accuracies": 1.0, "rewards/chosen": 0.15010561048984528, "rewards/margins": 2.0146870613098145, "rewards/rejected": -1.8645814657211304, "step": 1802 }, { "epoch": 0.21, "learning_rate": 2.4128526278824767e-07, "logits/chosen": -2.9675090312957764, "logits/rejected": -2.915804862976074, "logps/chosen": -296.2667236328125, "logps/rejected": -222.76336669921875, "loss": 0.3732, "rewards/accuracies": 0.875, "rewards/chosen": -0.07646234333515167, "rewards/margins": 1.3029041290283203, "rewards/rejected": -1.379366397857666, "step": 1803 }, { "epoch": 0.21, "learning_rate": 2.4125014631862343e-07, "logits/chosen": -3.206181526184082, "logits/rejected": -3.2034802436828613, "logps/chosen": -90.62210845947266, "logps/rejected": -165.38406372070312, "loss": 0.5434, "rewards/accuracies": 0.625, "rewards/chosen": -0.2525031566619873, "rewards/margins": 0.9475864768028259, "rewards/rejected": -1.200089693069458, "step": 1804 }, { "epoch": 0.21, "learning_rate": 2.412150298489992e-07, "logits/chosen": -3.146010160446167, "logits/rejected": -3.005582809448242, "logps/chosen": -281.2833557128906, "logps/rejected": -298.551025390625, "loss": 0.3157, "rewards/accuracies": 1.0, "rewards/chosen": -0.08163248002529144, "rewards/margins": 1.2042107582092285, "rewards/rejected": -1.2858431339263916, "step": 1805 }, { "epoch": 0.21, "learning_rate": 2.411799133793749e-07, "logits/chosen": -2.9047060012817383, "logits/rejected": -2.8392202854156494, "logps/chosen": -197.58004760742188, "logps/rejected": -340.43170166015625, "loss": 0.3509, "rewards/accuracies": 0.875, "rewards/chosen": -0.16672877967357635, "rewards/margins": 1.4568045139312744, "rewards/rejected": -1.6235331296920776, "step": 1806 }, { "epoch": 0.21, "learning_rate": 2.4114479690975064e-07, "logits/chosen": -2.1913044452667236, "logits/rejected": -2.2105109691619873, "logps/chosen": -245.36485290527344, "logps/rejected": -233.07164001464844, "loss": 0.5469, "rewards/accuracies": 0.75, "rewards/chosen": -0.504901111125946, "rewards/margins": 1.3426004648208618, "rewards/rejected": -1.8475016355514526, "step": 1807 }, { "epoch": 0.21, "learning_rate": 2.4110968044012644e-07, "logits/chosen": -2.7578930854797363, "logits/rejected": -2.7092695236206055, "logps/chosen": -233.57481384277344, "logps/rejected": -266.0731201171875, "loss": 0.3252, "rewards/accuracies": 1.0, "rewards/chosen": -0.5472968816757202, "rewards/margins": 1.5048975944519043, "rewards/rejected": -2.052194595336914, "step": 1808 }, { "epoch": 0.21, "learning_rate": 2.4107456397050214e-07, "logits/chosen": -2.4891245365142822, "logits/rejected": -2.4622931480407715, "logps/chosen": -384.259765625, "logps/rejected": -318.8699951171875, "loss": 0.2145, "rewards/accuracies": 1.0, "rewards/chosen": 0.35938915610313416, "rewards/margins": 2.066375494003296, "rewards/rejected": -1.7069863080978394, "step": 1809 }, { "epoch": 0.21, "learning_rate": 2.410394475008779e-07, "logits/chosen": -3.166712999343872, "logits/rejected": -3.004619836807251, "logps/chosen": -267.0263977050781, "logps/rejected": -197.66006469726562, "loss": 0.7152, "rewards/accuracies": 0.75, "rewards/chosen": -0.46882912516593933, "rewards/margins": 0.5559472441673279, "rewards/rejected": -1.0247764587402344, "step": 1810 }, { "epoch": 0.21, "learning_rate": 2.4100433103125365e-07, "logits/chosen": -3.61323618888855, "logits/rejected": -3.492032766342163, "logps/chosen": -282.75213623046875, "logps/rejected": -256.89862060546875, "loss": 0.3741, "rewards/accuracies": 0.875, "rewards/chosen": 0.12818875908851624, "rewards/margins": 1.2939426898956299, "rewards/rejected": -1.1657540798187256, "step": 1811 }, { "epoch": 0.21, "learning_rate": 2.409692145616294e-07, "logits/chosen": -2.9817934036254883, "logits/rejected": -3.2869231700897217, "logps/chosen": -239.7997589111328, "logps/rejected": -186.92274475097656, "loss": 0.3028, "rewards/accuracies": 0.75, "rewards/chosen": 0.2774461805820465, "rewards/margins": 1.5824965238571167, "rewards/rejected": -1.305050253868103, "step": 1812 }, { "epoch": 0.21, "learning_rate": 2.4093409809200516e-07, "logits/chosen": -2.571326732635498, "logits/rejected": -2.466388702392578, "logps/chosen": -340.9006652832031, "logps/rejected": -415.9167175292969, "loss": 0.2401, "rewards/accuracies": 0.875, "rewards/chosen": -0.17888230085372925, "rewards/margins": 2.3979411125183105, "rewards/rejected": -2.5768234729766846, "step": 1813 }, { "epoch": 0.21, "learning_rate": 2.4089898162238086e-07, "logits/chosen": -2.92287015914917, "logits/rejected": -2.8410611152648926, "logps/chosen": -268.87286376953125, "logps/rejected": -192.04782104492188, "loss": 0.2925, "rewards/accuracies": 0.875, "rewards/chosen": 0.29529693722724915, "rewards/margins": 1.6396294832229614, "rewards/rejected": -1.3443325757980347, "step": 1814 }, { "epoch": 0.21, "learning_rate": 2.408638651527566e-07, "logits/chosen": -2.401008129119873, "logits/rejected": -2.5115413665771484, "logps/chosen": -259.52630615234375, "logps/rejected": -272.1788330078125, "loss": 0.6156, "rewards/accuracies": 0.625, "rewards/chosen": 0.027551118284463882, "rewards/margins": 0.5791054964065552, "rewards/rejected": -0.5515543222427368, "step": 1815 }, { "epoch": 0.21, "learning_rate": 2.4082874868313237e-07, "logits/chosen": -3.01741361618042, "logits/rejected": -2.624587059020996, "logps/chosen": -318.7940979003906, "logps/rejected": -301.8634033203125, "loss": 0.399, "rewards/accuracies": 0.875, "rewards/chosen": -0.017174065113067627, "rewards/margins": 0.9977447986602783, "rewards/rejected": -1.0149188041687012, "step": 1816 }, { "epoch": 0.21, "learning_rate": 2.407936322135081e-07, "logits/chosen": -2.6454830169677734, "logits/rejected": -2.621281623840332, "logps/chosen": -373.0331726074219, "logps/rejected": -221.32321166992188, "loss": 0.4632, "rewards/accuracies": 0.75, "rewards/chosen": 0.1358024924993515, "rewards/margins": 1.1383870840072632, "rewards/rejected": -1.0025845766067505, "step": 1817 }, { "epoch": 0.21, "learning_rate": 2.407585157438839e-07, "logits/chosen": -2.74299955368042, "logits/rejected": -2.533597946166992, "logps/chosen": -437.7091064453125, "logps/rejected": -197.56243896484375, "loss": 0.5653, "rewards/accuracies": 0.75, "rewards/chosen": 0.1875694990158081, "rewards/margins": 0.5917748212814331, "rewards/rejected": -0.4042053520679474, "step": 1818 }, { "epoch": 0.21, "learning_rate": 2.4072339927425963e-07, "logits/chosen": -3.31374454498291, "logits/rejected": -3.2132441997528076, "logps/chosen": -355.820556640625, "logps/rejected": -280.9417419433594, "loss": 0.4856, "rewards/accuracies": 0.625, "rewards/chosen": -0.18488293886184692, "rewards/margins": 1.6677491664886475, "rewards/rejected": -1.8526321649551392, "step": 1819 }, { "epoch": 0.21, "learning_rate": 2.4068828280463533e-07, "logits/chosen": -2.425079107284546, "logits/rejected": -2.5127720832824707, "logps/chosen": -351.137939453125, "logps/rejected": -416.93988037109375, "loss": 0.3254, "rewards/accuracies": 0.875, "rewards/chosen": 0.34431570768356323, "rewards/margins": 1.907684326171875, "rewards/rejected": -1.563368558883667, "step": 1820 }, { "epoch": 0.21, "learning_rate": 2.4065316633501114e-07, "logits/chosen": -2.5490379333496094, "logits/rejected": -2.9291326999664307, "logps/chosen": -403.2880554199219, "logps/rejected": -343.7261962890625, "loss": 0.2818, "rewards/accuracies": 0.875, "rewards/chosen": -0.10454273223876953, "rewards/margins": 1.8806912899017334, "rewards/rejected": -1.9852339029312134, "step": 1821 }, { "epoch": 0.21, "learning_rate": 2.4061804986538684e-07, "logits/chosen": -2.7972593307495117, "logits/rejected": -3.167428970336914, "logps/chosen": -323.6942138671875, "logps/rejected": -258.0151062011719, "loss": 0.4117, "rewards/accuracies": 1.0, "rewards/chosen": 0.07030066847801208, "rewards/margins": 0.844513475894928, "rewards/rejected": -0.7742128372192383, "step": 1822 }, { "epoch": 0.21, "learning_rate": 2.405829333957626e-07, "logits/chosen": -3.4452574253082275, "logits/rejected": -3.116353750228882, "logps/chosen": -304.1138000488281, "logps/rejected": -317.852294921875, "loss": 0.251, "rewards/accuracies": 1.0, "rewards/chosen": 0.0007074326276779175, "rewards/margins": 1.9118893146514893, "rewards/rejected": -1.911181926727295, "step": 1823 }, { "epoch": 0.21, "learning_rate": 2.4054781692613835e-07, "logits/chosen": -3.1927690505981445, "logits/rejected": -3.075185775756836, "logps/chosen": -103.44029998779297, "logps/rejected": -163.81784057617188, "loss": 0.4502, "rewards/accuracies": 0.875, "rewards/chosen": 0.30610334873199463, "rewards/margins": 0.6559350490570068, "rewards/rejected": -0.349831759929657, "step": 1824 }, { "epoch": 0.21, "learning_rate": 2.405127004565141e-07, "logits/chosen": -3.270399808883667, "logits/rejected": -3.2742762565612793, "logps/chosen": -150.1024932861328, "logps/rejected": -242.41928100585938, "loss": 0.1911, "rewards/accuracies": 1.0, "rewards/chosen": 0.5639007091522217, "rewards/margins": 2.171006679534912, "rewards/rejected": -1.6071059703826904, "step": 1825 }, { "epoch": 0.21, "learning_rate": 2.4047758398688985e-07, "logits/chosen": -2.6011955738067627, "logits/rejected": -2.495676040649414, "logps/chosen": -331.3778076171875, "logps/rejected": -354.8858642578125, "loss": 0.4295, "rewards/accuracies": 0.625, "rewards/chosen": -0.2116556167602539, "rewards/margins": 1.261932373046875, "rewards/rejected": -1.473587989807129, "step": 1826 }, { "epoch": 0.21, "learning_rate": 2.404424675172656e-07, "logits/chosen": -3.067300319671631, "logits/rejected": -3.2956361770629883, "logps/chosen": -246.4269561767578, "logps/rejected": -292.41375732421875, "loss": 0.2676, "rewards/accuracies": 0.875, "rewards/chosen": 0.5071303844451904, "rewards/margins": 3.2434682846069336, "rewards/rejected": -2.736337900161743, "step": 1827 }, { "epoch": 0.21, "learning_rate": 2.404073510476413e-07, "logits/chosen": -3.154561996459961, "logits/rejected": -3.0340800285339355, "logps/chosen": -103.849365234375, "logps/rejected": -179.877197265625, "loss": 0.3974, "rewards/accuracies": 0.75, "rewards/chosen": -0.20260120928287506, "rewards/margins": 1.3019003868103027, "rewards/rejected": -1.504501461982727, "step": 1828 }, { "epoch": 0.21, "learning_rate": 2.4037223457801706e-07, "logits/chosen": -3.667299747467041, "logits/rejected": -3.7130157947540283, "logps/chosen": -198.6332244873047, "logps/rejected": -236.09280395507812, "loss": 0.6574, "rewards/accuracies": 0.625, "rewards/chosen": -0.42137086391448975, "rewards/margins": 0.6130428314208984, "rewards/rejected": -1.0344136953353882, "step": 1829 }, { "epoch": 0.21, "learning_rate": 2.403371181083928e-07, "logits/chosen": -3.575718879699707, "logits/rejected": -3.613237142562866, "logps/chosen": -203.17555236816406, "logps/rejected": -190.31185913085938, "loss": 0.3735, "rewards/accuracies": 0.875, "rewards/chosen": 0.14820124208927155, "rewards/margins": 1.7944077253341675, "rewards/rejected": -1.6462064981460571, "step": 1830 }, { "epoch": 0.21, "learning_rate": 2.4030200163876857e-07, "logits/chosen": -3.8135619163513184, "logits/rejected": -3.779196262359619, "logps/chosen": -187.3614959716797, "logps/rejected": -241.08596801757812, "loss": 0.7252, "rewards/accuracies": 0.625, "rewards/chosen": -0.2534208297729492, "rewards/margins": 1.5497138500213623, "rewards/rejected": -1.803134799003601, "step": 1831 }, { "epoch": 0.21, "learning_rate": 2.402668851691443e-07, "logits/chosen": -3.3541243076324463, "logits/rejected": -3.7305502891540527, "logps/chosen": -163.34739685058594, "logps/rejected": -271.3277587890625, "loss": 0.2919, "rewards/accuracies": 0.75, "rewards/chosen": 0.2380848377943039, "rewards/margins": 1.9160436391830444, "rewards/rejected": -1.6779589653015137, "step": 1832 }, { "epoch": 0.21, "learning_rate": 2.402317686995201e-07, "logits/chosen": -3.0275893211364746, "logits/rejected": -3.245628833770752, "logps/chosen": -399.3750915527344, "logps/rejected": -312.2113342285156, "loss": 0.4461, "rewards/accuracies": 0.625, "rewards/chosen": 0.201968252658844, "rewards/margins": 2.2023918628692627, "rewards/rejected": -2.0004236698150635, "step": 1833 }, { "epoch": 0.21, "learning_rate": 2.4019665222989583e-07, "logits/chosen": -2.775270700454712, "logits/rejected": -2.5878751277923584, "logps/chosen": -206.10665893554688, "logps/rejected": -223.36859130859375, "loss": 0.6056, "rewards/accuracies": 0.875, "rewards/chosen": 0.5078791379928589, "rewards/margins": 0.5788403749465942, "rewards/rejected": -0.07096125185489655, "step": 1834 }, { "epoch": 0.21, "learning_rate": 2.401615357602716e-07, "logits/chosen": -3.1379141807556152, "logits/rejected": -2.928715944290161, "logps/chosen": -225.46835327148438, "logps/rejected": -395.69549560546875, "loss": 0.4616, "rewards/accuracies": 0.625, "rewards/chosen": -0.0617222785949707, "rewards/margins": 1.119102954864502, "rewards/rejected": -1.1808249950408936, "step": 1835 }, { "epoch": 0.21, "learning_rate": 2.401264192906473e-07, "logits/chosen": -2.3342316150665283, "logits/rejected": -2.241162061691284, "logps/chosen": -264.0896301269531, "logps/rejected": -215.17123413085938, "loss": 0.5549, "rewards/accuracies": 0.75, "rewards/chosen": 0.026220127940177917, "rewards/margins": 0.5930976867675781, "rewards/rejected": -0.5668774843215942, "step": 1836 }, { "epoch": 0.21, "learning_rate": 2.4009130282102304e-07, "logits/chosen": -3.4868226051330566, "logits/rejected": -3.579270839691162, "logps/chosen": -132.08517456054688, "logps/rejected": -195.231201171875, "loss": 0.901, "rewards/accuracies": 0.625, "rewards/chosen": -0.43552449345588684, "rewards/margins": 0.39090240001678467, "rewards/rejected": -0.8264269232749939, "step": 1837 }, { "epoch": 0.21, "learning_rate": 2.400561863513988e-07, "logits/chosen": -3.0242526531219482, "logits/rejected": -3.15548038482666, "logps/chosen": -339.38232421875, "logps/rejected": -273.70025634765625, "loss": 0.1951, "rewards/accuracies": 1.0, "rewards/chosen": 0.18326154351234436, "rewards/margins": 2.3434157371520996, "rewards/rejected": -2.160154342651367, "step": 1838 }, { "epoch": 0.21, "learning_rate": 2.4002106988177455e-07, "logits/chosen": -2.6112544536590576, "logits/rejected": -2.499181032180786, "logps/chosen": -347.30743408203125, "logps/rejected": -227.7771453857422, "loss": 0.4463, "rewards/accuracies": 0.75, "rewards/chosen": 0.267295777797699, "rewards/margins": 0.9837774634361267, "rewards/rejected": -0.7164817452430725, "step": 1839 }, { "epoch": 0.21, "learning_rate": 2.399859534121503e-07, "logits/chosen": -2.925374984741211, "logits/rejected": -3.1526970863342285, "logps/chosen": -311.2057189941406, "logps/rejected": -311.64654541015625, "loss": 0.8433, "rewards/accuracies": 0.5, "rewards/chosen": -0.9040330648422241, "rewards/margins": 0.21446117758750916, "rewards/rejected": -1.1184941530227661, "step": 1840 }, { "epoch": 0.21, "learning_rate": 2.39950836942526e-07, "logits/chosen": -3.2247226238250732, "logits/rejected": -3.1036572456359863, "logps/chosen": -169.89695739746094, "logps/rejected": -182.72891235351562, "loss": 0.4789, "rewards/accuracies": 0.875, "rewards/chosen": 0.3294336795806885, "rewards/margins": 1.4052774906158447, "rewards/rejected": -1.0758436918258667, "step": 1841 }, { "epoch": 0.21, "learning_rate": 2.399157204729018e-07, "logits/chosen": -3.5640523433685303, "logits/rejected": -3.4269490242004395, "logps/chosen": -176.86865234375, "logps/rejected": -167.06423950195312, "loss": 0.3912, "rewards/accuracies": 0.875, "rewards/chosen": 0.09696812927722931, "rewards/margins": 1.6871545314788818, "rewards/rejected": -1.590186357498169, "step": 1842 }, { "epoch": 0.21, "learning_rate": 2.3988060400327756e-07, "logits/chosen": -3.465142250061035, "logits/rejected": -3.2102460861206055, "logps/chosen": -360.8543395996094, "logps/rejected": -277.5147705078125, "loss": 0.287, "rewards/accuracies": 1.0, "rewards/chosen": -0.2871372699737549, "rewards/margins": 1.8692712783813477, "rewards/rejected": -2.1564083099365234, "step": 1843 }, { "epoch": 0.21, "learning_rate": 2.3984548753365326e-07, "logits/chosen": -3.891622304916382, "logits/rejected": -3.6739211082458496, "logps/chosen": -260.8900146484375, "logps/rejected": -242.9789581298828, "loss": 0.3143, "rewards/accuracies": 0.875, "rewards/chosen": -0.4440181851387024, "rewards/margins": 1.4399336576461792, "rewards/rejected": -1.8839519023895264, "step": 1844 }, { "epoch": 0.21, "learning_rate": 2.39810371064029e-07, "logits/chosen": -2.6025052070617676, "logits/rejected": -2.844527244567871, "logps/chosen": -85.60796356201172, "logps/rejected": -203.6422119140625, "loss": 0.4577, "rewards/accuracies": 0.75, "rewards/chosen": 0.3265523910522461, "rewards/margins": 1.0658100843429565, "rewards/rejected": -0.7392577528953552, "step": 1845 }, { "epoch": 0.21, "learning_rate": 2.3977525459440477e-07, "logits/chosen": -3.266981601715088, "logits/rejected": -3.445608615875244, "logps/chosen": -105.17025756835938, "logps/rejected": -199.04888916015625, "loss": 0.2707, "rewards/accuracies": 1.0, "rewards/chosen": 0.19062800705432892, "rewards/margins": 1.6527048349380493, "rewards/rejected": -1.4620769023895264, "step": 1846 }, { "epoch": 0.21, "learning_rate": 2.397401381247805e-07, "logits/chosen": -3.5979175567626953, "logits/rejected": -3.4828290939331055, "logps/chosen": -354.52301025390625, "logps/rejected": -381.6019287109375, "loss": 0.5469, "rewards/accuracies": 0.625, "rewards/chosen": -0.558978796005249, "rewards/margins": 1.3851336240768433, "rewards/rejected": -1.9441125392913818, "step": 1847 }, { "epoch": 0.21, "learning_rate": 2.397050216551563e-07, "logits/chosen": -2.402862310409546, "logits/rejected": -2.5514373779296875, "logps/chosen": -198.13714599609375, "logps/rejected": -177.84286499023438, "loss": 0.3056, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012534111738204956, "rewards/margins": 1.4720139503479004, "rewards/rejected": -1.4732673168182373, "step": 1848 }, { "epoch": 0.21, "learning_rate": 2.39669905185532e-07, "logits/chosen": -3.577563524246216, "logits/rejected": -3.393393039703369, "logps/chosen": -118.42765808105469, "logps/rejected": -131.41741943359375, "loss": 0.4426, "rewards/accuracies": 0.875, "rewards/chosen": 0.5391978025436401, "rewards/margins": 0.9912474155426025, "rewards/rejected": -0.45204958319664, "step": 1849 }, { "epoch": 0.21, "learning_rate": 2.3963478871590773e-07, "logits/chosen": -2.7539124488830566, "logits/rejected": -3.013728618621826, "logps/chosen": -168.27194213867188, "logps/rejected": -282.2645263671875, "loss": 0.3516, "rewards/accuracies": 0.875, "rewards/chosen": 0.14810240268707275, "rewards/margins": 1.590822458267212, "rewards/rejected": -1.4427201747894287, "step": 1850 }, { "epoch": 0.21, "learning_rate": 2.395996722462835e-07, "logits/chosen": -2.8367085456848145, "logits/rejected": -3.052182912826538, "logps/chosen": -207.71893310546875, "logps/rejected": -289.6271057128906, "loss": 0.341, "rewards/accuracies": 0.875, "rewards/chosen": -0.37533995509147644, "rewards/margins": 1.6185909509658813, "rewards/rejected": -1.9939309358596802, "step": 1851 }, { "epoch": 0.21, "learning_rate": 2.3956455577665924e-07, "logits/chosen": -2.9096243381500244, "logits/rejected": -2.9823153018951416, "logps/chosen": -365.6651916503906, "logps/rejected": -149.75221252441406, "loss": 0.6267, "rewards/accuracies": 0.625, "rewards/chosen": 0.25319188833236694, "rewards/margins": 1.0111883878707886, "rewards/rejected": -0.7579965591430664, "step": 1852 }, { "epoch": 0.21, "learning_rate": 2.39529439307035e-07, "logits/chosen": -3.22668719291687, "logits/rejected": -3.1928296089172363, "logps/chosen": -265.7268371582031, "logps/rejected": -198.2104034423828, "loss": 0.582, "rewards/accuracies": 0.625, "rewards/chosen": -0.08171147853136063, "rewards/margins": 0.861680269241333, "rewards/rejected": -0.9433917999267578, "step": 1853 }, { "epoch": 0.21, "learning_rate": 2.394943228374107e-07, "logits/chosen": -3.2386631965637207, "logits/rejected": -3.243152379989624, "logps/chosen": -433.9160461425781, "logps/rejected": -225.78988647460938, "loss": 0.319, "rewards/accuracies": 0.75, "rewards/chosen": 0.29755592346191406, "rewards/margins": 1.500485897064209, "rewards/rejected": -1.202929973602295, "step": 1854 }, { "epoch": 0.21, "learning_rate": 2.394592063677865e-07, "logits/chosen": -3.030597448348999, "logits/rejected": -2.704895496368408, "logps/chosen": -535.2483520507812, "logps/rejected": -387.57000732421875, "loss": 0.4871, "rewards/accuracies": 0.625, "rewards/chosen": -0.07267952710390091, "rewards/margins": 1.3345167636871338, "rewards/rejected": -1.407196283340454, "step": 1855 }, { "epoch": 0.21, "learning_rate": 2.3942408989816226e-07, "logits/chosen": -3.2048325538635254, "logits/rejected": -3.3020262718200684, "logps/chosen": -335.4111022949219, "logps/rejected": -333.7204895019531, "loss": 0.3778, "rewards/accuracies": 0.75, "rewards/chosen": 0.406870037317276, "rewards/margins": 1.393203616142273, "rewards/rejected": -0.9863336086273193, "step": 1856 }, { "epoch": 0.21, "learning_rate": 2.3938897342853796e-07, "logits/chosen": -3.2815136909484863, "logits/rejected": -3.267902374267578, "logps/chosen": -336.240966796875, "logps/rejected": -300.3442077636719, "loss": 0.275, "rewards/accuracies": 0.875, "rewards/chosen": 0.1589762568473816, "rewards/margins": 1.7175028324127197, "rewards/rejected": -1.5585265159606934, "step": 1857 }, { "epoch": 0.21, "learning_rate": 2.393538569589137e-07, "logits/chosen": -2.693025588989258, "logits/rejected": -2.3875937461853027, "logps/chosen": -210.38687133789062, "logps/rejected": -232.27613830566406, "loss": 0.3787, "rewards/accuracies": 0.875, "rewards/chosen": 0.10163797438144684, "rewards/margins": 1.0402240753173828, "rewards/rejected": -0.9385862350463867, "step": 1858 }, { "epoch": 0.21, "learning_rate": 2.3931874048928947e-07, "logits/chosen": -2.6612093448638916, "logits/rejected": -2.688091516494751, "logps/chosen": -183.9331512451172, "logps/rejected": -155.62789916992188, "loss": 0.5004, "rewards/accuracies": 0.5, "rewards/chosen": -0.052919209003448486, "rewards/margins": 0.9086102247238159, "rewards/rejected": -0.9615294933319092, "step": 1859 }, { "epoch": 0.21, "learning_rate": 2.392836240196652e-07, "logits/chosen": -2.8398513793945312, "logits/rejected": -2.814011573791504, "logps/chosen": -285.54852294921875, "logps/rejected": -322.54205322265625, "loss": 0.3787, "rewards/accuracies": 0.75, "rewards/chosen": -0.35564279556274414, "rewards/margins": 1.1657389402389526, "rewards/rejected": -1.5213816165924072, "step": 1860 }, { "epoch": 0.21, "learning_rate": 2.3924850755004097e-07, "logits/chosen": -3.555253505706787, "logits/rejected": -3.5419552326202393, "logps/chosen": -316.2832946777344, "logps/rejected": -189.86831665039062, "loss": 0.2559, "rewards/accuracies": 0.875, "rewards/chosen": 0.24905207753181458, "rewards/margins": 2.1671640872955322, "rewards/rejected": -1.91811203956604, "step": 1861 }, { "epoch": 0.21, "learning_rate": 2.392133910804167e-07, "logits/chosen": -3.052665948867798, "logits/rejected": -2.729041814804077, "logps/chosen": -315.60882568359375, "logps/rejected": -241.69515991210938, "loss": 0.6393, "rewards/accuracies": 0.875, "rewards/chosen": -0.304241418838501, "rewards/margins": 0.6388493776321411, "rewards/rejected": -0.9430907964706421, "step": 1862 }, { "epoch": 0.21, "learning_rate": 2.3917827461079243e-07, "logits/chosen": -2.755173921585083, "logits/rejected": -2.790940523147583, "logps/chosen": -305.3826904296875, "logps/rejected": -304.3145751953125, "loss": 0.4328, "rewards/accuracies": 0.625, "rewards/chosen": -0.2234201729297638, "rewards/margins": 2.2677574157714844, "rewards/rejected": -2.491177558898926, "step": 1863 }, { "epoch": 0.21, "learning_rate": 2.3914315814116823e-07, "logits/chosen": -2.8260200023651123, "logits/rejected": -3.0565593242645264, "logps/chosen": -183.84423828125, "logps/rejected": -411.9515380859375, "loss": 0.368, "rewards/accuracies": 0.75, "rewards/chosen": -0.04081929102540016, "rewards/margins": 2.799703598022461, "rewards/rejected": -2.8405227661132812, "step": 1864 }, { "epoch": 0.21, "learning_rate": 2.3910804167154394e-07, "logits/chosen": -2.693484306335449, "logits/rejected": -2.95511531829834, "logps/chosen": -192.39453125, "logps/rejected": -341.84967041015625, "loss": 1.0245, "rewards/accuracies": 0.25, "rewards/chosen": -0.5181946754455566, "rewards/margins": -0.006462007761001587, "rewards/rejected": -0.5117326974868774, "step": 1865 }, { "epoch": 0.22, "learning_rate": 2.390729252019197e-07, "logits/chosen": -3.240333318710327, "logits/rejected": -3.312986135482788, "logps/chosen": -261.5600280761719, "logps/rejected": -252.7403564453125, "loss": 0.4648, "rewards/accuracies": 0.75, "rewards/chosen": -0.43903714418411255, "rewards/margins": 1.3385133743286133, "rewards/rejected": -1.777550458908081, "step": 1866 }, { "epoch": 0.22, "learning_rate": 2.3903780873229544e-07, "logits/chosen": -2.849130153656006, "logits/rejected": -2.850721836090088, "logps/chosen": -211.24868774414062, "logps/rejected": -215.11126708984375, "loss": 0.655, "rewards/accuracies": 0.625, "rewards/chosen": -0.14812934398651123, "rewards/margins": 0.6029497385025024, "rewards/rejected": -0.7510790228843689, "step": 1867 }, { "epoch": 0.22, "learning_rate": 2.390026922626712e-07, "logits/chosen": -3.8219552040100098, "logits/rejected": -3.4607245922088623, "logps/chosen": -182.2537384033203, "logps/rejected": -171.80551147460938, "loss": 0.2674, "rewards/accuracies": 1.0, "rewards/chosen": 0.05184371769428253, "rewards/margins": 1.4408807754516602, "rewards/rejected": -1.3890371322631836, "step": 1868 }, { "epoch": 0.22, "learning_rate": 2.3896757579304695e-07, "logits/chosen": -3.5720267295837402, "logits/rejected": -3.5305967330932617, "logps/chosen": -174.22036743164062, "logps/rejected": -220.58416748046875, "loss": 0.4028, "rewards/accuracies": 0.75, "rewards/chosen": 0.012285813689231873, "rewards/margins": 2.375335454940796, "rewards/rejected": -2.3630495071411133, "step": 1869 }, { "epoch": 0.22, "learning_rate": 2.3893245932342265e-07, "logits/chosen": -2.7672641277313232, "logits/rejected": -2.6977572441101074, "logps/chosen": -247.30990600585938, "logps/rejected": -171.23654174804688, "loss": 0.7358, "rewards/accuracies": 0.5, "rewards/chosen": -0.2699260711669922, "rewards/margins": 0.6120525598526001, "rewards/rejected": -0.8819786310195923, "step": 1870 }, { "epoch": 0.22, "learning_rate": 2.388973428537984e-07, "logits/chosen": -2.9081223011016846, "logits/rejected": -3.121767044067383, "logps/chosen": -215.24801635742188, "logps/rejected": -216.57276916503906, "loss": 0.4661, "rewards/accuracies": 0.625, "rewards/chosen": -0.6541638374328613, "rewards/margins": 1.0534257888793945, "rewards/rejected": -1.7075896263122559, "step": 1871 }, { "epoch": 0.22, "learning_rate": 2.3886222638417416e-07, "logits/chosen": -2.7080483436584473, "logits/rejected": -2.6535959243774414, "logps/chosen": -315.1611328125, "logps/rejected": -381.1353454589844, "loss": 0.6026, "rewards/accuracies": 0.75, "rewards/chosen": 0.0642850399017334, "rewards/margins": 0.6664404273033142, "rewards/rejected": -0.6021553874015808, "step": 1872 }, { "epoch": 0.22, "learning_rate": 2.388271099145499e-07, "logits/chosen": -3.3447628021240234, "logits/rejected": -3.316511392593384, "logps/chosen": -284.9142761230469, "logps/rejected": -236.10165405273438, "loss": 0.3703, "rewards/accuracies": 0.875, "rewards/chosen": -0.014459162950515747, "rewards/margins": 1.2012072801589966, "rewards/rejected": -1.21566641330719, "step": 1873 }, { "epoch": 0.22, "learning_rate": 2.3879199344492567e-07, "logits/chosen": -2.641317844390869, "logits/rejected": -2.820796489715576, "logps/chosen": -301.94207763671875, "logps/rejected": -274.16156005859375, "loss": 0.566, "rewards/accuracies": 0.625, "rewards/chosen": 0.12546077370643616, "rewards/margins": 0.6516165733337402, "rewards/rejected": -0.5261558294296265, "step": 1874 }, { "epoch": 0.22, "learning_rate": 2.3875687697530137e-07, "logits/chosen": -3.3212637901306152, "logits/rejected": -3.4066638946533203, "logps/chosen": -394.03936767578125, "logps/rejected": -448.79095458984375, "loss": 0.2709, "rewards/accuracies": 0.875, "rewards/chosen": 0.6851503849029541, "rewards/margins": 2.550360679626465, "rewards/rejected": -1.8652102947235107, "step": 1875 }, { "epoch": 0.22, "learning_rate": 2.387217605056772e-07, "logits/chosen": -2.2592625617980957, "logits/rejected": -2.318779706954956, "logps/chosen": -335.7944641113281, "logps/rejected": -385.7993469238281, "loss": 0.2751, "rewards/accuracies": 0.875, "rewards/chosen": 0.033325958997011185, "rewards/margins": 2.3046064376831055, "rewards/rejected": -2.271280527114868, "step": 1876 }, { "epoch": 0.22, "learning_rate": 2.3868664403605293e-07, "logits/chosen": -2.441523551940918, "logits/rejected": -2.626868724822998, "logps/chosen": -235.89488220214844, "logps/rejected": -311.6707458496094, "loss": 0.5161, "rewards/accuracies": 0.5, "rewards/chosen": 0.1649269014596939, "rewards/margins": 1.7457470893859863, "rewards/rejected": -1.5808199644088745, "step": 1877 }, { "epoch": 0.22, "learning_rate": 2.3865152756642863e-07, "logits/chosen": -3.144813060760498, "logits/rejected": -2.667576789855957, "logps/chosen": -385.2821044921875, "logps/rejected": -264.3667297363281, "loss": 0.4031, "rewards/accuracies": 1.0, "rewards/chosen": 0.23157432675361633, "rewards/margins": 0.8427044749259949, "rewards/rejected": -0.6111301183700562, "step": 1878 }, { "epoch": 0.22, "learning_rate": 2.386164110968044e-07, "logits/chosen": -2.72945499420166, "logits/rejected": -2.905499219894409, "logps/chosen": -180.20306396484375, "logps/rejected": -229.76187133789062, "loss": 0.2942, "rewards/accuracies": 0.875, "rewards/chosen": 0.23968681693077087, "rewards/margins": 2.631272792816162, "rewards/rejected": -2.3915860652923584, "step": 1879 }, { "epoch": 0.22, "learning_rate": 2.3858129462718014e-07, "logits/chosen": -3.453127384185791, "logits/rejected": -3.2610836029052734, "logps/chosen": -190.06707763671875, "logps/rejected": -156.92901611328125, "loss": 0.3814, "rewards/accuracies": 0.875, "rewards/chosen": -0.15209154784679413, "rewards/margins": 1.3634053468704224, "rewards/rejected": -1.515496850013733, "step": 1880 }, { "epoch": 0.22, "learning_rate": 2.385461781575559e-07, "logits/chosen": -3.198052406311035, "logits/rejected": -3.4438600540161133, "logps/chosen": -452.38909912109375, "logps/rejected": -444.28302001953125, "loss": 0.27, "rewards/accuracies": 1.0, "rewards/chosen": -0.0633179098367691, "rewards/margins": 1.9909108877182007, "rewards/rejected": -2.0542285442352295, "step": 1881 }, { "epoch": 0.22, "learning_rate": 2.3851106168793165e-07, "logits/chosen": -3.2948079109191895, "logits/rejected": -3.117093324661255, "logps/chosen": -148.17599487304688, "logps/rejected": -181.55645751953125, "loss": 0.5527, "rewards/accuracies": 0.875, "rewards/chosen": -0.8143954277038574, "rewards/margins": 0.5694471597671509, "rewards/rejected": -1.3838427066802979, "step": 1882 }, { "epoch": 0.22, "learning_rate": 2.3847594521830735e-07, "logits/chosen": -3.2947707176208496, "logits/rejected": -3.4383034706115723, "logps/chosen": -169.2095947265625, "logps/rejected": -207.70339965820312, "loss": 0.1376, "rewards/accuracies": 1.0, "rewards/chosen": -0.05324330925941467, "rewards/margins": 2.5381827354431152, "rewards/rejected": -2.591426134109497, "step": 1883 }, { "epoch": 0.22, "learning_rate": 2.384408287486831e-07, "logits/chosen": -3.062617301940918, "logits/rejected": -3.0640885829925537, "logps/chosen": -269.67547607421875, "logps/rejected": -336.88446044921875, "loss": 0.6612, "rewards/accuracies": 0.75, "rewards/chosen": -0.20418275892734528, "rewards/margins": 0.8373125791549683, "rewards/rejected": -1.0414953231811523, "step": 1884 }, { "epoch": 0.22, "learning_rate": 2.3840571227905888e-07, "logits/chosen": -3.3146984577178955, "logits/rejected": -2.659238815307617, "logps/chosen": -380.74871826171875, "logps/rejected": -359.3706970214844, "loss": 0.498, "rewards/accuracies": 0.75, "rewards/chosen": -0.5748739838600159, "rewards/margins": 1.3990894556045532, "rewards/rejected": -1.9739634990692139, "step": 1885 }, { "epoch": 0.22, "learning_rate": 2.383705958094346e-07, "logits/chosen": -2.795694351196289, "logits/rejected": -2.9148809909820557, "logps/chosen": -252.338134765625, "logps/rejected": -270.98797607421875, "loss": 0.5251, "rewards/accuracies": 0.75, "rewards/chosen": 0.33758652210235596, "rewards/margins": 1.7036025524139404, "rewards/rejected": -1.366016149520874, "step": 1886 }, { "epoch": 0.22, "learning_rate": 2.3833547933981036e-07, "logits/chosen": -3.4827117919921875, "logits/rejected": -3.2771108150482178, "logps/chosen": -343.9080810546875, "logps/rejected": -180.65945434570312, "loss": 0.3233, "rewards/accuracies": 0.875, "rewards/chosen": 0.2024131417274475, "rewards/margins": 1.1869056224822998, "rewards/rejected": -0.9844925403594971, "step": 1887 }, { "epoch": 0.22, "learning_rate": 2.3830036287018612e-07, "logits/chosen": -3.2256879806518555, "logits/rejected": -3.0551669597625732, "logps/chosen": -146.13330078125, "logps/rejected": -245.65476989746094, "loss": 0.288, "rewards/accuracies": 0.875, "rewards/chosen": -0.23634114861488342, "rewards/margins": 1.736095905303955, "rewards/rejected": -1.9724370241165161, "step": 1888 }, { "epoch": 0.22, "learning_rate": 2.3826524640056184e-07, "logits/chosen": -3.020913600921631, "logits/rejected": -3.0713706016540527, "logps/chosen": -292.6805114746094, "logps/rejected": -441.4288635253906, "loss": 0.4698, "rewards/accuracies": 0.75, "rewards/chosen": 0.19158652424812317, "rewards/margins": 1.174302339553833, "rewards/rejected": -0.9827158451080322, "step": 1889 }, { "epoch": 0.22, "learning_rate": 2.382301299309376e-07, "logits/chosen": -2.4694907665252686, "logits/rejected": -2.43359637260437, "logps/chosen": -76.58487701416016, "logps/rejected": -108.47946166992188, "loss": 0.8584, "rewards/accuracies": 0.375, "rewards/chosen": -0.349437415599823, "rewards/margins": -0.06080085039138794, "rewards/rejected": -0.28863656520843506, "step": 1890 }, { "epoch": 0.22, "learning_rate": 2.3819501346131332e-07, "logits/chosen": -2.677908182144165, "logits/rejected": -2.371974468231201, "logps/chosen": -165.31201171875, "logps/rejected": -229.9835205078125, "loss": 0.6309, "rewards/accuracies": 0.5, "rewards/chosen": -0.37851524353027344, "rewards/margins": 0.9979226589202881, "rewards/rejected": -1.3764379024505615, "step": 1891 }, { "epoch": 0.22, "learning_rate": 2.3815989699168908e-07, "logits/chosen": -2.7455406188964844, "logits/rejected": -2.7070841789245605, "logps/chosen": -345.7112121582031, "logps/rejected": -286.5813903808594, "loss": 0.6149, "rewards/accuracies": 0.625, "rewards/chosen": -0.625001072883606, "rewards/margins": 0.3239609897136688, "rewards/rejected": -0.9489621520042419, "step": 1892 }, { "epoch": 0.22, "learning_rate": 2.3812478052206486e-07, "logits/chosen": -3.1710972785949707, "logits/rejected": -3.1996312141418457, "logps/chosen": -128.76968383789062, "logps/rejected": -126.00115966796875, "loss": 0.3669, "rewards/accuracies": 0.75, "rewards/chosen": 0.1471884548664093, "rewards/margins": 1.7755229473114014, "rewards/rejected": -1.6283345222473145, "step": 1893 }, { "epoch": 0.22, "learning_rate": 2.3808966405244059e-07, "logits/chosen": -4.009587287902832, "logits/rejected": -3.9257664680480957, "logps/chosen": -401.2257385253906, "logps/rejected": -315.4661560058594, "loss": 0.166, "rewards/accuracies": 0.875, "rewards/chosen": 0.16800488531589508, "rewards/margins": 2.6292805671691895, "rewards/rejected": -2.461275815963745, "step": 1894 }, { "epoch": 0.22, "learning_rate": 2.3805454758281634e-07, "logits/chosen": -3.2193124294281006, "logits/rejected": -3.392913341522217, "logps/chosen": -174.849853515625, "logps/rejected": -255.85043334960938, "loss": 0.2286, "rewards/accuracies": 1.0, "rewards/chosen": 0.3333899676799774, "rewards/margins": 1.9591898918151855, "rewards/rejected": -1.6257997751235962, "step": 1895 }, { "epoch": 0.22, "learning_rate": 2.380194311131921e-07, "logits/chosen": -2.5379769802093506, "logits/rejected": -2.7388737201690674, "logps/chosen": -231.49888610839844, "logps/rejected": -238.67532348632812, "loss": 0.4557, "rewards/accuracies": 0.625, "rewards/chosen": -0.2929534912109375, "rewards/margins": 1.5845260620117188, "rewards/rejected": -1.8774795532226562, "step": 1896 }, { "epoch": 0.22, "learning_rate": 2.3798431464356782e-07, "logits/chosen": -2.861204147338867, "logits/rejected": -2.7918097972869873, "logps/chosen": -419.77752685546875, "logps/rejected": -140.5082244873047, "loss": 0.662, "rewards/accuracies": 0.5, "rewards/chosen": -0.13766422867774963, "rewards/margins": 0.7582541704177856, "rewards/rejected": -0.8959183692932129, "step": 1897 }, { "epoch": 0.22, "learning_rate": 2.3794919817394357e-07, "logits/chosen": -2.3190271854400635, "logits/rejected": -2.5565736293792725, "logps/chosen": -295.4584045410156, "logps/rejected": -282.631103515625, "loss": 0.3132, "rewards/accuracies": 0.875, "rewards/chosen": 0.23769791424274445, "rewards/margins": 2.139730215072632, "rewards/rejected": -1.9020321369171143, "step": 1898 }, { "epoch": 0.22, "learning_rate": 2.379140817043193e-07, "logits/chosen": -3.7230165004730225, "logits/rejected": -3.505910873413086, "logps/chosen": -247.37513732910156, "logps/rejected": -196.65969848632812, "loss": 0.2829, "rewards/accuracies": 0.875, "rewards/chosen": 0.17114269733428955, "rewards/margins": 1.8355562686920166, "rewards/rejected": -1.664413571357727, "step": 1899 }, { "epoch": 0.22, "learning_rate": 2.3787896523469506e-07, "logits/chosen": -2.6964643001556396, "logits/rejected": -2.463961124420166, "logps/chosen": -245.89938354492188, "logps/rejected": -452.70953369140625, "loss": 0.2413, "rewards/accuracies": 1.0, "rewards/chosen": 0.06895250082015991, "rewards/margins": 1.7071712017059326, "rewards/rejected": -1.638218641281128, "step": 1900 }, { "epoch": 0.22, "learning_rate": 2.378438487650708e-07, "logits/chosen": -3.1125733852386475, "logits/rejected": -3.0511443614959717, "logps/chosen": -207.16773986816406, "logps/rejected": -227.18804931640625, "loss": 0.583, "rewards/accuracies": 0.75, "rewards/chosen": -0.11963582038879395, "rewards/margins": 0.7430042624473572, "rewards/rejected": -0.8626401424407959, "step": 1901 }, { "epoch": 0.22, "learning_rate": 2.3780873229544654e-07, "logits/chosen": -3.546088933944702, "logits/rejected": -4.047410011291504, "logps/chosen": -177.18405151367188, "logps/rejected": -211.7225341796875, "loss": 0.3657, "rewards/accuracies": 0.875, "rewards/chosen": -0.09369050711393356, "rewards/margins": 1.209911823272705, "rewards/rejected": -1.3036023378372192, "step": 1902 }, { "epoch": 0.22, "learning_rate": 2.3777361582582232e-07, "logits/chosen": -2.426943778991699, "logits/rejected": -2.498091220855713, "logps/chosen": -248.98025512695312, "logps/rejected": -222.96487426757812, "loss": 0.3045, "rewards/accuracies": 1.0, "rewards/chosen": -0.5036156177520752, "rewards/margins": 1.497896432876587, "rewards/rejected": -2.001512050628662, "step": 1903 }, { "epoch": 0.22, "learning_rate": 2.3773849935619802e-07, "logits/chosen": -2.845189094543457, "logits/rejected": -2.9526124000549316, "logps/chosen": -303.8992614746094, "logps/rejected": -225.97897338867188, "loss": 0.6146, "rewards/accuracies": 0.875, "rewards/chosen": -0.2985466718673706, "rewards/margins": 0.5058640241622925, "rewards/rejected": -0.8044107556343079, "step": 1904 }, { "epoch": 0.22, "learning_rate": 2.377033828865738e-07, "logits/chosen": -3.3253822326660156, "logits/rejected": -3.160639762878418, "logps/chosen": -332.3227844238281, "logps/rejected": -278.9083251953125, "loss": 0.5514, "rewards/accuracies": 0.75, "rewards/chosen": -0.1049327477812767, "rewards/margins": 1.0209028720855713, "rewards/rejected": -1.125835657119751, "step": 1905 }, { "epoch": 0.22, "learning_rate": 2.3766826641694955e-07, "logits/chosen": -2.6788642406463623, "logits/rejected": -2.6205129623413086, "logps/chosen": -305.1359558105469, "logps/rejected": -397.4089660644531, "loss": 0.7512, "rewards/accuracies": 0.875, "rewards/chosen": -0.0500718355178833, "rewards/margins": 0.1743241548538208, "rewards/rejected": -0.2243960052728653, "step": 1906 }, { "epoch": 0.22, "learning_rate": 2.3763314994732528e-07, "logits/chosen": -3.5826706886291504, "logits/rejected": -3.9390478134155273, "logps/chosen": -132.31370544433594, "logps/rejected": -204.89389038085938, "loss": 0.4297, "rewards/accuracies": 0.75, "rewards/chosen": -0.23746031522750854, "rewards/margins": 1.8942127227783203, "rewards/rejected": -2.1316730976104736, "step": 1907 }, { "epoch": 0.22, "learning_rate": 2.3759803347770103e-07, "logits/chosen": -3.439944267272949, "logits/rejected": -4.008194923400879, "logps/chosen": -208.41029357910156, "logps/rejected": -207.65365600585938, "loss": 0.3293, "rewards/accuracies": 0.875, "rewards/chosen": 0.008326005190610886, "rewards/margins": 1.9840483665466309, "rewards/rejected": -1.975722312927246, "step": 1908 }, { "epoch": 0.22, "learning_rate": 2.375629170080768e-07, "logits/chosen": -3.09859037399292, "logits/rejected": -3.2638890743255615, "logps/chosen": -169.08837890625, "logps/rejected": -285.9677734375, "loss": 0.4884, "rewards/accuracies": 0.875, "rewards/chosen": -0.1801491230726242, "rewards/margins": 0.6366292238235474, "rewards/rejected": -0.816778302192688, "step": 1909 }, { "epoch": 0.22, "learning_rate": 2.3752780053845252e-07, "logits/chosen": -2.9172348976135254, "logits/rejected": -2.6605277061462402, "logps/chosen": -485.97796630859375, "logps/rejected": -276.85235595703125, "loss": 0.3688, "rewards/accuracies": 0.75, "rewards/chosen": -0.034717708826065063, "rewards/margins": 1.3961790800094604, "rewards/rejected": -1.4308967590332031, "step": 1910 }, { "epoch": 0.22, "learning_rate": 2.3749268406882827e-07, "logits/chosen": -2.18872332572937, "logits/rejected": -2.3571431636810303, "logps/chosen": -353.88616943359375, "logps/rejected": -330.8863830566406, "loss": 0.2935, "rewards/accuracies": 0.875, "rewards/chosen": 0.15640822052955627, "rewards/margins": 1.881563425064087, "rewards/rejected": -1.7251551151275635, "step": 1911 }, { "epoch": 0.22, "learning_rate": 2.37457567599204e-07, "logits/chosen": -3.1446750164031982, "logits/rejected": -3.3271193504333496, "logps/chosen": -301.9336853027344, "logps/rejected": -335.8569030761719, "loss": 0.4629, "rewards/accuracies": 0.875, "rewards/chosen": 0.0881853848695755, "rewards/margins": 1.9179389476776123, "rewards/rejected": -1.8297535181045532, "step": 1912 }, { "epoch": 0.22, "learning_rate": 2.3742245112957975e-07, "logits/chosen": -2.59894061088562, "logits/rejected": -2.5261459350585938, "logps/chosen": -423.3135070800781, "logps/rejected": -260.0544128417969, "loss": 0.3484, "rewards/accuracies": 0.875, "rewards/chosen": 0.5532470941543579, "rewards/margins": 1.5248947143554688, "rewards/rejected": -0.9716475605964661, "step": 1913 }, { "epoch": 0.22, "learning_rate": 2.3738733465995553e-07, "logits/chosen": -2.0945417881011963, "logits/rejected": -2.2415428161621094, "logps/chosen": -309.9722900390625, "logps/rejected": -189.98486328125, "loss": 0.5224, "rewards/accuracies": 0.625, "rewards/chosen": 0.38147151470184326, "rewards/margins": 1.1462559700012207, "rewards/rejected": -0.7647844552993774, "step": 1914 }, { "epoch": 0.22, "learning_rate": 2.3735221819033123e-07, "logits/chosen": -2.961062431335449, "logits/rejected": -3.2115139961242676, "logps/chosen": -356.75830078125, "logps/rejected": -252.25650024414062, "loss": 0.4849, "rewards/accuracies": 0.625, "rewards/chosen": -0.06073129177093506, "rewards/margins": 1.453205943107605, "rewards/rejected": -1.51393723487854, "step": 1915 }, { "epoch": 0.22, "learning_rate": 2.37317101720707e-07, "logits/chosen": -2.6856603622436523, "logits/rejected": -2.725938320159912, "logps/chosen": -467.9189758300781, "logps/rejected": -457.5125732421875, "loss": 0.3431, "rewards/accuracies": 0.875, "rewards/chosen": 0.3994789719581604, "rewards/margins": 3.115227699279785, "rewards/rejected": -2.7157485485076904, "step": 1916 }, { "epoch": 0.22, "learning_rate": 2.3728198525108277e-07, "logits/chosen": -3.0873265266418457, "logits/rejected": -2.8918614387512207, "logps/chosen": -236.23187255859375, "logps/rejected": -99.74676513671875, "loss": 0.3865, "rewards/accuracies": 0.75, "rewards/chosen": -0.04841993749141693, "rewards/margins": 1.121311068534851, "rewards/rejected": -1.1697309017181396, "step": 1917 }, { "epoch": 0.22, "learning_rate": 2.372468687814585e-07, "logits/chosen": -3.5829591751098633, "logits/rejected": -3.4007558822631836, "logps/chosen": -338.7941589355469, "logps/rejected": -321.5729675292969, "loss": 0.4131, "rewards/accuracies": 0.75, "rewards/chosen": 0.11648986488580704, "rewards/margins": 1.7642592191696167, "rewards/rejected": -1.6477693319320679, "step": 1918 }, { "epoch": 0.22, "learning_rate": 2.3721175231183425e-07, "logits/chosen": -3.922886371612549, "logits/rejected": -4.103003025054932, "logps/chosen": -276.6469421386719, "logps/rejected": -270.7803955078125, "loss": 0.3732, "rewards/accuracies": 0.875, "rewards/chosen": 0.21608521044254303, "rewards/margins": 1.3308022022247314, "rewards/rejected": -1.11471688747406, "step": 1919 }, { "epoch": 0.22, "learning_rate": 2.3717663584220997e-07, "logits/chosen": -3.449526071548462, "logits/rejected": -3.163794755935669, "logps/chosen": -243.94845581054688, "logps/rejected": -262.3933410644531, "loss": 0.5065, "rewards/accuracies": 0.625, "rewards/chosen": -0.25724267959594727, "rewards/margins": 0.9045336246490479, "rewards/rejected": -1.1617763042449951, "step": 1920 }, { "epoch": 0.22, "learning_rate": 2.3714151937258573e-07, "logits/chosen": -2.6736531257629395, "logits/rejected": -2.7727906703948975, "logps/chosen": -289.97076416015625, "logps/rejected": -183.34121704101562, "loss": 0.4423, "rewards/accuracies": 0.875, "rewards/chosen": 0.09541648626327515, "rewards/margins": 0.896037757396698, "rewards/rejected": -0.8006212711334229, "step": 1921 }, { "epoch": 0.22, "learning_rate": 2.3710640290296148e-07, "logits/chosen": -3.780348300933838, "logits/rejected": -3.5795912742614746, "logps/chosen": -195.87997436523438, "logps/rejected": -211.07737731933594, "loss": 0.5017, "rewards/accuracies": 0.875, "rewards/chosen": -0.17038977146148682, "rewards/margins": 1.1311023235321045, "rewards/rejected": -1.3014919757843018, "step": 1922 }, { "epoch": 0.22, "learning_rate": 2.370712864333372e-07, "logits/chosen": -2.852374315261841, "logits/rejected": -2.8214497566223145, "logps/chosen": -117.99516296386719, "logps/rejected": -104.95326232910156, "loss": 0.3945, "rewards/accuracies": 1.0, "rewards/chosen": 0.3886273205280304, "rewards/margins": 0.9860265851020813, "rewards/rejected": -0.5973993539810181, "step": 1923 }, { "epoch": 0.22, "learning_rate": 2.3703616996371296e-07, "logits/chosen": -3.5518434047698975, "logits/rejected": -3.354944944381714, "logps/chosen": -204.48284912109375, "logps/rejected": -189.72694396972656, "loss": 0.4995, "rewards/accuracies": 0.75, "rewards/chosen": 0.07105147838592529, "rewards/margins": 0.5340943932533264, "rewards/rejected": -0.4630429148674011, "step": 1924 }, { "epoch": 0.22, "learning_rate": 2.3700105349408874e-07, "logits/chosen": -3.3995602130889893, "logits/rejected": -3.1734938621520996, "logps/chosen": -157.18045043945312, "logps/rejected": -223.75341796875, "loss": 0.4146, "rewards/accuracies": 0.625, "rewards/chosen": -0.19136002659797668, "rewards/margins": 0.9750504493713379, "rewards/rejected": -1.1664104461669922, "step": 1925 }, { "epoch": 0.22, "learning_rate": 2.3696593702446444e-07, "logits/chosen": -2.7661330699920654, "logits/rejected": -2.771693706512451, "logps/chosen": -200.76788330078125, "logps/rejected": -224.69912719726562, "loss": 0.1915, "rewards/accuracies": 1.0, "rewards/chosen": 0.6854684948921204, "rewards/margins": 2.594217538833618, "rewards/rejected": -1.9087492227554321, "step": 1926 }, { "epoch": 0.22, "learning_rate": 2.3693082055484022e-07, "logits/chosen": -3.077664852142334, "logits/rejected": -3.011803150177002, "logps/chosen": -394.0582275390625, "logps/rejected": -210.83856201171875, "loss": 0.5072, "rewards/accuracies": 0.75, "rewards/chosen": 0.14685095846652985, "rewards/margins": 1.029951572418213, "rewards/rejected": -0.8831006288528442, "step": 1927 }, { "epoch": 0.22, "learning_rate": 2.3689570408521595e-07, "logits/chosen": -3.0255439281463623, "logits/rejected": -2.714571475982666, "logps/chosen": -162.053955078125, "logps/rejected": -245.2805938720703, "loss": 0.2434, "rewards/accuracies": 1.0, "rewards/chosen": 0.05062887817621231, "rewards/margins": 1.7896405458450317, "rewards/rejected": -1.7390116453170776, "step": 1928 }, { "epoch": 0.22, "learning_rate": 2.368605876155917e-07, "logits/chosen": -2.939450263977051, "logits/rejected": -2.5983824729919434, "logps/chosen": -235.44837951660156, "logps/rejected": -317.5164794921875, "loss": 0.2184, "rewards/accuracies": 1.0, "rewards/chosen": 0.5185390114784241, "rewards/margins": 1.8647598028182983, "rewards/rejected": -1.3462207317352295, "step": 1929 }, { "epoch": 0.22, "learning_rate": 2.3682547114596746e-07, "logits/chosen": -3.1611690521240234, "logits/rejected": -3.0884628295898438, "logps/chosen": -303.3222961425781, "logps/rejected": -238.7315216064453, "loss": 0.222, "rewards/accuracies": 1.0, "rewards/chosen": 0.43077534437179565, "rewards/margins": 1.7002692222595215, "rewards/rejected": -1.269493818283081, "step": 1930 }, { "epoch": 0.22, "learning_rate": 2.367903546763432e-07, "logits/chosen": -3.4758362770080566, "logits/rejected": -3.3406496047973633, "logps/chosen": -202.26531982421875, "logps/rejected": -218.9394989013672, "loss": 0.4252, "rewards/accuracies": 0.75, "rewards/chosen": 0.17453855276107788, "rewards/margins": 1.8564305305480957, "rewards/rejected": -1.681891918182373, "step": 1931 }, { "epoch": 0.22, "learning_rate": 2.3675523820671894e-07, "logits/chosen": -3.1129913330078125, "logits/rejected": -3.1399240493774414, "logps/chosen": -309.18353271484375, "logps/rejected": -272.7609558105469, "loss": 0.4118, "rewards/accuracies": 0.75, "rewards/chosen": -0.027665115892887115, "rewards/margins": 1.2992477416992188, "rewards/rejected": -1.3269128799438477, "step": 1932 }, { "epoch": 0.22, "learning_rate": 2.367201217370947e-07, "logits/chosen": -2.9982151985168457, "logits/rejected": -2.9184248447418213, "logps/chosen": -246.80738830566406, "logps/rejected": -261.47821044921875, "loss": 0.3356, "rewards/accuracies": 0.875, "rewards/chosen": 0.37736326456069946, "rewards/margins": 1.5595622062683105, "rewards/rejected": -1.1821987628936768, "step": 1933 }, { "epoch": 0.22, "learning_rate": 2.3668500526747042e-07, "logits/chosen": -3.3420968055725098, "logits/rejected": -3.270202875137329, "logps/chosen": -297.59075927734375, "logps/rejected": -332.28948974609375, "loss": 0.2877, "rewards/accuracies": 0.875, "rewards/chosen": -0.03211592137813568, "rewards/margins": 2.0577778816223145, "rewards/rejected": -2.0898938179016113, "step": 1934 }, { "epoch": 0.22, "learning_rate": 2.3664988879784618e-07, "logits/chosen": -3.3796701431274414, "logits/rejected": -3.6025915145874023, "logps/chosen": -240.79527282714844, "logps/rejected": -204.16064453125, "loss": 0.4075, "rewards/accuracies": 0.75, "rewards/chosen": 0.0855565071105957, "rewards/margins": 1.2519049644470215, "rewards/rejected": -1.1663485765457153, "step": 1935 }, { "epoch": 0.22, "learning_rate": 2.366147723282219e-07, "logits/chosen": -1.9516831636428833, "logits/rejected": -2.189332962036133, "logps/chosen": -359.30133056640625, "logps/rejected": -249.66867065429688, "loss": 0.2442, "rewards/accuracies": 0.875, "rewards/chosen": 0.4304964244365692, "rewards/margins": 1.93541419506073, "rewards/rejected": -1.504917860031128, "step": 1936 }, { "epoch": 0.22, "learning_rate": 2.3657965585859768e-07, "logits/chosen": -2.6603569984436035, "logits/rejected": -2.864053249359131, "logps/chosen": -226.45791625976562, "logps/rejected": -138.53054809570312, "loss": 0.739, "rewards/accuracies": 0.625, "rewards/chosen": -0.536213219165802, "rewards/margins": 0.4281558394432068, "rewards/rejected": -0.9643691182136536, "step": 1937 }, { "epoch": 0.22, "learning_rate": 2.3654453938897344e-07, "logits/chosen": -3.158496141433716, "logits/rejected": -3.446389675140381, "logps/chosen": -281.42120361328125, "logps/rejected": -313.11962890625, "loss": 0.1598, "rewards/accuracies": 0.875, "rewards/chosen": -0.04978714510798454, "rewards/margins": 3.8393454551696777, "rewards/rejected": -3.8891327381134033, "step": 1938 }, { "epoch": 0.22, "learning_rate": 2.3650942291934917e-07, "logits/chosen": -3.6872968673706055, "logits/rejected": -3.6790711879730225, "logps/chosen": -120.06697082519531, "logps/rejected": -165.63514709472656, "loss": 0.2851, "rewards/accuracies": 0.875, "rewards/chosen": 0.3381662666797638, "rewards/margins": 2.2810964584350586, "rewards/rejected": -1.942929983139038, "step": 1939 }, { "epoch": 0.22, "learning_rate": 2.3647430644972492e-07, "logits/chosen": -3.797347068786621, "logits/rejected": -3.5110065937042236, "logps/chosen": -228.1983642578125, "logps/rejected": -155.682373046875, "loss": 0.6128, "rewards/accuracies": 0.625, "rewards/chosen": -0.19041664898395538, "rewards/margins": 0.8307810425758362, "rewards/rejected": -1.02119779586792, "step": 1940 }, { "epoch": 0.22, "learning_rate": 2.3643918998010067e-07, "logits/chosen": -3.910299777984619, "logits/rejected": -4.0295305252075195, "logps/chosen": -161.05770874023438, "logps/rejected": -221.011474609375, "loss": 0.4901, "rewards/accuracies": 0.625, "rewards/chosen": -0.009858936071395874, "rewards/margins": 1.878171682357788, "rewards/rejected": -1.8880305290222168, "step": 1941 }, { "epoch": 0.22, "learning_rate": 2.364040735104764e-07, "logits/chosen": -2.938612937927246, "logits/rejected": -3.0558581352233887, "logps/chosen": -347.83355712890625, "logps/rejected": -206.13336181640625, "loss": 0.7616, "rewards/accuracies": 0.5, "rewards/chosen": -0.5964158773422241, "rewards/margins": 0.07491900026798248, "rewards/rejected": -0.6713349223136902, "step": 1942 }, { "epoch": 0.22, "learning_rate": 2.3636895704085215e-07, "logits/chosen": -3.327674150466919, "logits/rejected": -3.1382312774658203, "logps/chosen": -262.4370422363281, "logps/rejected": -285.7188720703125, "loss": 0.3137, "rewards/accuracies": 0.875, "rewards/chosen": 0.32218700647354126, "rewards/margins": 1.9246999025344849, "rewards/rejected": -1.6025128364562988, "step": 1943 }, { "epoch": 0.22, "learning_rate": 2.3633384057122788e-07, "logits/chosen": -3.5266926288604736, "logits/rejected": -3.4114933013916016, "logps/chosen": -461.6474609375, "logps/rejected": -384.744384765625, "loss": 0.4509, "rewards/accuracies": 0.875, "rewards/chosen": -0.6036498546600342, "rewards/margins": 1.4447903633117676, "rewards/rejected": -2.048440456390381, "step": 1944 }, { "epoch": 0.22, "learning_rate": 2.3629872410160364e-07, "logits/chosen": -4.050859451293945, "logits/rejected": -4.083256244659424, "logps/chosen": -223.20828247070312, "logps/rejected": -262.51312255859375, "loss": 0.4942, "rewards/accuracies": 0.75, "rewards/chosen": -0.18191909790039062, "rewards/margins": 1.1304326057434082, "rewards/rejected": -1.3123517036437988, "step": 1945 }, { "epoch": 0.22, "learning_rate": 2.362636076319794e-07, "logits/chosen": -3.7139179706573486, "logits/rejected": -4.133528709411621, "logps/chosen": -205.20828247070312, "logps/rejected": -310.1703186035156, "loss": 0.1853, "rewards/accuracies": 0.875, "rewards/chosen": 0.5596264600753784, "rewards/margins": 3.109354019165039, "rewards/rejected": -2.549727439880371, "step": 1946 }, { "epoch": 0.22, "learning_rate": 2.3622849116235512e-07, "logits/chosen": -3.1618762016296387, "logits/rejected": -3.259639024734497, "logps/chosen": -203.43765258789062, "logps/rejected": -220.77877807617188, "loss": 0.9563, "rewards/accuracies": 0.75, "rewards/chosen": -0.5748702883720398, "rewards/margins": 0.8919491767883301, "rewards/rejected": -1.466819405555725, "step": 1947 }, { "epoch": 0.22, "learning_rate": 2.361933746927309e-07, "logits/chosen": -3.091965675354004, "logits/rejected": -2.996055841445923, "logps/chosen": -333.53045654296875, "logps/rejected": -257.8176574707031, "loss": 0.4503, "rewards/accuracies": 0.75, "rewards/chosen": -0.012640029191970825, "rewards/margins": 0.912082850933075, "rewards/rejected": -0.9247229099273682, "step": 1948 }, { "epoch": 0.22, "learning_rate": 2.361582582231066e-07, "logits/chosen": -3.3650078773498535, "logits/rejected": -3.174588680267334, "logps/chosen": -226.8917694091797, "logps/rejected": -103.08650207519531, "loss": 0.7467, "rewards/accuracies": 0.5, "rewards/chosen": -0.2040969431400299, "rewards/margins": 0.2637457847595215, "rewards/rejected": -0.467842698097229, "step": 1949 }, { "epoch": 0.22, "learning_rate": 2.3612314175348238e-07, "logits/chosen": -2.694668769836426, "logits/rejected": -2.980736255645752, "logps/chosen": -249.96926879882812, "logps/rejected": -200.3289337158203, "loss": 0.6669, "rewards/accuracies": 0.625, "rewards/chosen": -0.9649742245674133, "rewards/margins": 0.18468379974365234, "rewards/rejected": -1.1496580839157104, "step": 1950 }, { "epoch": 0.22, "learning_rate": 2.3608802528385813e-07, "logits/chosen": -3.2263569831848145, "logits/rejected": -3.305718421936035, "logps/chosen": -168.17361450195312, "logps/rejected": -147.99964904785156, "loss": 0.6989, "rewards/accuracies": 0.625, "rewards/chosen": -0.897217333316803, "rewards/margins": 0.7248235940933228, "rewards/rejected": -1.6220409870147705, "step": 1951 }, { "epoch": 0.23, "learning_rate": 2.3605290881423386e-07, "logits/chosen": -3.6969847679138184, "logits/rejected": -3.545210123062134, "logps/chosen": -252.175048828125, "logps/rejected": -289.9844055175781, "loss": 0.4321, "rewards/accuracies": 0.625, "rewards/chosen": -0.15360704064369202, "rewards/margins": 1.4460314512252808, "rewards/rejected": -1.5996384620666504, "step": 1952 }, { "epoch": 0.23, "learning_rate": 2.3601779234460961e-07, "logits/chosen": -3.2407126426696777, "logits/rejected": -2.8367509841918945, "logps/chosen": -274.98681640625, "logps/rejected": -207.9128875732422, "loss": 0.4495, "rewards/accuracies": 0.75, "rewards/chosen": -0.31745845079421997, "rewards/margins": 0.7776055932044983, "rewards/rejected": -1.0950640439987183, "step": 1953 }, { "epoch": 0.23, "learning_rate": 2.3598267587498537e-07, "logits/chosen": -2.8842508792877197, "logits/rejected": -2.9146432876586914, "logps/chosen": -306.57452392578125, "logps/rejected": -245.10397338867188, "loss": 0.7073, "rewards/accuracies": 0.625, "rewards/chosen": -0.70942223072052, "rewards/margins": 0.39295345544815063, "rewards/rejected": -1.1023757457733154, "step": 1954 }, { "epoch": 0.23, "learning_rate": 2.359475594053611e-07, "logits/chosen": -2.928433418273926, "logits/rejected": -2.936575412750244, "logps/chosen": -320.8022155761719, "logps/rejected": -266.6853332519531, "loss": 0.327, "rewards/accuracies": 0.875, "rewards/chosen": -0.06687840819358826, "rewards/margins": 1.47237229347229, "rewards/rejected": -1.5392507314682007, "step": 1955 }, { "epoch": 0.23, "learning_rate": 2.3591244293573685e-07, "logits/chosen": -3.7143688201904297, "logits/rejected": -3.4185824394226074, "logps/chosen": -234.03256225585938, "logps/rejected": -261.76666259765625, "loss": 0.7314, "rewards/accuracies": 0.5, "rewards/chosen": -0.666238009929657, "rewards/margins": 0.875117301940918, "rewards/rejected": -1.5413552522659302, "step": 1956 }, { "epoch": 0.23, "learning_rate": 2.3587732646611258e-07, "logits/chosen": -2.9856066703796387, "logits/rejected": -3.025017499923706, "logps/chosen": -486.88458251953125, "logps/rejected": -332.754150390625, "loss": 0.1526, "rewards/accuracies": 1.0, "rewards/chosen": 0.5096101760864258, "rewards/margins": 2.5398807525634766, "rewards/rejected": -2.030270576477051, "step": 1957 }, { "epoch": 0.23, "learning_rate": 2.3584220999648833e-07, "logits/chosen": -3.129814624786377, "logits/rejected": -3.169912815093994, "logps/chosen": -460.1618347167969, "logps/rejected": -255.0945281982422, "loss": 0.346, "rewards/accuracies": 1.0, "rewards/chosen": 0.3262975513935089, "rewards/margins": 1.0936552286148071, "rewards/rejected": -0.767357587814331, "step": 1958 }, { "epoch": 0.23, "learning_rate": 2.358070935268641e-07, "logits/chosen": -3.6455092430114746, "logits/rejected": -3.701657295227051, "logps/chosen": -234.68646240234375, "logps/rejected": -222.25436401367188, "loss": 0.7702, "rewards/accuracies": 0.625, "rewards/chosen": 0.06427282094955444, "rewards/margins": 0.5861705541610718, "rewards/rejected": -0.5218977928161621, "step": 1959 }, { "epoch": 0.23, "learning_rate": 2.357719770572398e-07, "logits/chosen": -2.662120819091797, "logits/rejected": -3.1801023483276367, "logps/chosen": -308.42352294921875, "logps/rejected": -174.07528686523438, "loss": 0.4272, "rewards/accuracies": 0.625, "rewards/chosen": 0.12972846627235413, "rewards/margins": 1.031456708908081, "rewards/rejected": -0.9017282724380493, "step": 1960 }, { "epoch": 0.23, "learning_rate": 2.357368605876156e-07, "logits/chosen": -3.6806488037109375, "logits/rejected": -3.452808380126953, "logps/chosen": -179.83517456054688, "logps/rejected": -155.84814453125, "loss": 0.3821, "rewards/accuracies": 0.875, "rewards/chosen": -0.04810775816440582, "rewards/margins": 0.8990609645843506, "rewards/rejected": -0.9471687078475952, "step": 1961 }, { "epoch": 0.23, "learning_rate": 2.3570174411799135e-07, "logits/chosen": -3.4087164402008057, "logits/rejected": -3.025362730026245, "logps/chosen": -273.7395324707031, "logps/rejected": -211.30308532714844, "loss": 0.712, "rewards/accuracies": 0.5, "rewards/chosen": -0.5862331986427307, "rewards/margins": 0.4231279492378235, "rewards/rejected": -1.0093611478805542, "step": 1962 }, { "epoch": 0.23, "learning_rate": 2.3566662764836707e-07, "logits/chosen": -3.027560234069824, "logits/rejected": -3.2989187240600586, "logps/chosen": -189.6392822265625, "logps/rejected": -178.92807006835938, "loss": 0.4945, "rewards/accuracies": 0.625, "rewards/chosen": 0.1672583371400833, "rewards/margins": 1.1380326747894287, "rewards/rejected": -0.9707743525505066, "step": 1963 }, { "epoch": 0.23, "learning_rate": 2.3563151117874283e-07, "logits/chosen": -2.862318992614746, "logits/rejected": -2.620781898498535, "logps/chosen": -412.0824890136719, "logps/rejected": -407.7791748046875, "loss": 0.4194, "rewards/accuracies": 0.75, "rewards/chosen": -0.15770098567008972, "rewards/margins": 1.1947884559631348, "rewards/rejected": -1.3524894714355469, "step": 1964 }, { "epoch": 0.23, "learning_rate": 2.3559639470911855e-07, "logits/chosen": -3.1342275142669678, "logits/rejected": -3.2022297382354736, "logps/chosen": -250.08865356445312, "logps/rejected": -252.90341186523438, "loss": 0.4769, "rewards/accuracies": 0.75, "rewards/chosen": -0.32614508271217346, "rewards/margins": 0.6992385983467102, "rewards/rejected": -1.025383710861206, "step": 1965 }, { "epoch": 0.23, "learning_rate": 2.355612782394943e-07, "logits/chosen": -3.786177158355713, "logits/rejected": -3.3957486152648926, "logps/chosen": -404.34014892578125, "logps/rejected": -175.4032440185547, "loss": 0.5511, "rewards/accuracies": 0.625, "rewards/chosen": -0.08581207692623138, "rewards/margins": 1.0726323127746582, "rewards/rejected": -1.1584444046020508, "step": 1966 }, { "epoch": 0.23, "learning_rate": 2.3552616176987006e-07, "logits/chosen": -3.10373854637146, "logits/rejected": -3.129697799682617, "logps/chosen": -182.16091918945312, "logps/rejected": -181.70484924316406, "loss": 0.6101, "rewards/accuracies": 0.625, "rewards/chosen": -0.28600138425827026, "rewards/margins": 0.9202946424484253, "rewards/rejected": -1.2062960863113403, "step": 1967 }, { "epoch": 0.23, "learning_rate": 2.354910453002458e-07, "logits/chosen": -2.781548500061035, "logits/rejected": -2.763784408569336, "logps/chosen": -120.17839050292969, "logps/rejected": -209.00851440429688, "loss": 0.304, "rewards/accuracies": 0.75, "rewards/chosen": 0.1158633828163147, "rewards/margins": 1.9739372730255127, "rewards/rejected": -1.8580738306045532, "step": 1968 }, { "epoch": 0.23, "learning_rate": 2.3545592883062154e-07, "logits/chosen": -3.1833155155181885, "logits/rejected": -2.836817741394043, "logps/chosen": -197.14303588867188, "logps/rejected": -160.02499389648438, "loss": 0.75, "rewards/accuracies": 0.5, "rewards/chosen": -0.33621856570243835, "rewards/margins": 0.18338410556316376, "rewards/rejected": -0.5196026563644409, "step": 1969 }, { "epoch": 0.23, "learning_rate": 2.3542081236099732e-07, "logits/chosen": -2.8824148178100586, "logits/rejected": -2.9640679359436035, "logps/chosen": -247.38136291503906, "logps/rejected": -211.27462768554688, "loss": 0.4154, "rewards/accuracies": 0.75, "rewards/chosen": 0.17264491319656372, "rewards/margins": 1.2930083274841309, "rewards/rejected": -1.1203633546829224, "step": 1970 }, { "epoch": 0.23, "learning_rate": 2.3538569589137305e-07, "logits/chosen": -2.7850444316864014, "logits/rejected": -3.2077322006225586, "logps/chosen": -280.5623779296875, "logps/rejected": -247.1150665283203, "loss": 0.4595, "rewards/accuracies": 0.75, "rewards/chosen": -0.013271048665046692, "rewards/margins": 1.8012468814849854, "rewards/rejected": -1.814517855644226, "step": 1971 }, { "epoch": 0.23, "learning_rate": 2.353505794217488e-07, "logits/chosen": -2.7597410678863525, "logits/rejected": -2.4582085609436035, "logps/chosen": -498.0703125, "logps/rejected": -239.3566131591797, "loss": 0.4264, "rewards/accuracies": 0.625, "rewards/chosen": -0.12834206223487854, "rewards/margins": 1.2244586944580078, "rewards/rejected": -1.3528008460998535, "step": 1972 }, { "epoch": 0.23, "learning_rate": 2.3531546295212453e-07, "logits/chosen": -3.6037542819976807, "logits/rejected": -3.8507797718048096, "logps/chosen": -230.17218017578125, "logps/rejected": -518.9234619140625, "loss": 0.2009, "rewards/accuracies": 1.0, "rewards/chosen": 0.1032576709985733, "rewards/margins": 2.7321343421936035, "rewards/rejected": -2.6288766860961914, "step": 1973 }, { "epoch": 0.23, "learning_rate": 2.3528034648250029e-07, "logits/chosen": -3.1235060691833496, "logits/rejected": -3.231555938720703, "logps/chosen": -261.3963317871094, "logps/rejected": -269.98565673828125, "loss": 0.5174, "rewards/accuracies": 0.625, "rewards/chosen": 0.008741632103919983, "rewards/margins": 1.5905370712280273, "rewards/rejected": -1.5817954540252686, "step": 1974 }, { "epoch": 0.23, "learning_rate": 2.3524523001287604e-07, "logits/chosen": -2.926250696182251, "logits/rejected": -2.9241366386413574, "logps/chosen": -326.24908447265625, "logps/rejected": -220.0181427001953, "loss": 0.5579, "rewards/accuracies": 0.625, "rewards/chosen": -0.09044589102268219, "rewards/margins": 0.535925030708313, "rewards/rejected": -0.6263708472251892, "step": 1975 }, { "epoch": 0.23, "learning_rate": 2.3521011354325177e-07, "logits/chosen": -3.4068362712860107, "logits/rejected": -3.146085262298584, "logps/chosen": -177.4674530029297, "logps/rejected": -161.8980712890625, "loss": 0.3209, "rewards/accuracies": 1.0, "rewards/chosen": 0.3288155496120453, "rewards/margins": 1.8098738193511963, "rewards/rejected": -1.4810583591461182, "step": 1976 }, { "epoch": 0.23, "learning_rate": 2.3517499707362752e-07, "logits/chosen": -3.3953781127929688, "logits/rejected": -3.2584385871887207, "logps/chosen": -196.67868041992188, "logps/rejected": -189.63523864746094, "loss": 0.3843, "rewards/accuracies": 0.75, "rewards/chosen": -0.46858957409858704, "rewards/margins": 1.648219108581543, "rewards/rejected": -2.1168088912963867, "step": 1977 }, { "epoch": 0.23, "learning_rate": 2.3513988060400327e-07, "logits/chosen": -2.5541305541992188, "logits/rejected": -2.795699119567871, "logps/chosen": -381.5532531738281, "logps/rejected": -178.51487731933594, "loss": 0.7529, "rewards/accuracies": 0.875, "rewards/chosen": -0.10719103366136551, "rewards/margins": 0.40942907333374023, "rewards/rejected": -0.5166201591491699, "step": 1978 }, { "epoch": 0.23, "learning_rate": 2.35104764134379e-07, "logits/chosen": -3.1287455558776855, "logits/rejected": -3.098891258239746, "logps/chosen": -291.9648742675781, "logps/rejected": -223.3717041015625, "loss": 0.2122, "rewards/accuracies": 1.0, "rewards/chosen": 0.4046216309070587, "rewards/margins": 1.9311535358428955, "rewards/rejected": -1.5265318155288696, "step": 1979 }, { "epoch": 0.23, "learning_rate": 2.3506964766475476e-07, "logits/chosen": -3.5724217891693115, "logits/rejected": -3.437025308609009, "logps/chosen": -266.0279541015625, "logps/rejected": -211.51556396484375, "loss": 0.4137, "rewards/accuracies": 0.875, "rewards/chosen": -0.21183404326438904, "rewards/margins": 1.0518218278884888, "rewards/rejected": -1.2636559009552002, "step": 1980 }, { "epoch": 0.23, "learning_rate": 2.3503453119513048e-07, "logits/chosen": -2.911797046661377, "logits/rejected": -2.946897268295288, "logps/chosen": -208.14990234375, "logps/rejected": -205.21763610839844, "loss": 0.8525, "rewards/accuracies": 0.625, "rewards/chosen": -0.8678486347198486, "rewards/margins": 0.11905436217784882, "rewards/rejected": -0.9869030117988586, "step": 1981 }, { "epoch": 0.23, "learning_rate": 2.3499941472550626e-07, "logits/chosen": -2.9123899936676025, "logits/rejected": -2.8558435440063477, "logps/chosen": -288.6016845703125, "logps/rejected": -322.5012512207031, "loss": 0.4858, "rewards/accuracies": 0.75, "rewards/chosen": -0.2832695543766022, "rewards/margins": 1.048529863357544, "rewards/rejected": -1.3317995071411133, "step": 1982 }, { "epoch": 0.23, "learning_rate": 2.3496429825588202e-07, "logits/chosen": -3.104579448699951, "logits/rejected": -3.136007785797119, "logps/chosen": -197.6335906982422, "logps/rejected": -290.9420166015625, "loss": 0.5118, "rewards/accuracies": 0.75, "rewards/chosen": -0.16035132110118866, "rewards/margins": 0.9162065982818604, "rewards/rejected": -1.076557993888855, "step": 1983 }, { "epoch": 0.23, "learning_rate": 2.3492918178625774e-07, "logits/chosen": -2.4632530212402344, "logits/rejected": -2.5979862213134766, "logps/chosen": -137.07080078125, "logps/rejected": -156.28280639648438, "loss": 0.3892, "rewards/accuracies": 0.625, "rewards/chosen": -0.2150254100561142, "rewards/margins": 1.5731987953186035, "rewards/rejected": -1.7882241010665894, "step": 1984 }, { "epoch": 0.23, "learning_rate": 2.348940653166335e-07, "logits/chosen": -3.679448127746582, "logits/rejected": -3.8115761280059814, "logps/chosen": -146.7847900390625, "logps/rejected": -139.30975341796875, "loss": 0.472, "rewards/accuracies": 0.75, "rewards/chosen": 0.10033592581748962, "rewards/margins": 1.1884117126464844, "rewards/rejected": -1.0880756378173828, "step": 1985 }, { "epoch": 0.23, "learning_rate": 2.3485894884700925e-07, "logits/chosen": -3.388212203979492, "logits/rejected": -3.599100351333618, "logps/chosen": -153.46485900878906, "logps/rejected": -210.74172973632812, "loss": 0.3317, "rewards/accuracies": 0.75, "rewards/chosen": 0.00030625052750110626, "rewards/margins": 1.9318147897720337, "rewards/rejected": -1.9315085411071777, "step": 1986 }, { "epoch": 0.23, "learning_rate": 2.3482383237738498e-07, "logits/chosen": -2.721313953399658, "logits/rejected": -2.9026601314544678, "logps/chosen": -347.9246826171875, "logps/rejected": -385.1693115234375, "loss": 0.4992, "rewards/accuracies": 0.625, "rewards/chosen": 0.07772264629602432, "rewards/margins": 1.1386456489562988, "rewards/rejected": -1.0609229803085327, "step": 1987 }, { "epoch": 0.23, "learning_rate": 2.3478871590776073e-07, "logits/chosen": -3.024780035018921, "logits/rejected": -3.2264184951782227, "logps/chosen": -245.78680419921875, "logps/rejected": -172.29356384277344, "loss": 0.4822, "rewards/accuracies": 0.625, "rewards/chosen": -0.01904572919011116, "rewards/margins": 0.9324353337287903, "rewards/rejected": -0.9514811635017395, "step": 1988 }, { "epoch": 0.23, "learning_rate": 2.3475359943813646e-07, "logits/chosen": -3.036740779876709, "logits/rejected": -2.59208345413208, "logps/chosen": -267.9580993652344, "logps/rejected": -239.78988647460938, "loss": 0.5305, "rewards/accuracies": 0.75, "rewards/chosen": -0.5468452572822571, "rewards/margins": 0.8903722763061523, "rewards/rejected": -1.4372174739837646, "step": 1989 }, { "epoch": 0.23, "learning_rate": 2.3471848296851221e-07, "logits/chosen": -2.996565818786621, "logits/rejected": -2.908280849456787, "logps/chosen": -145.6241455078125, "logps/rejected": -158.10855102539062, "loss": 0.5997, "rewards/accuracies": 0.625, "rewards/chosen": -0.5281105041503906, "rewards/margins": 0.5710313320159912, "rewards/rejected": -1.0991419553756714, "step": 1990 }, { "epoch": 0.23, "learning_rate": 2.3468336649888797e-07, "logits/chosen": -3.249218702316284, "logits/rejected": -3.673222541809082, "logps/chosen": -210.15866088867188, "logps/rejected": -336.4881286621094, "loss": 0.3395, "rewards/accuracies": 0.875, "rewards/chosen": -0.11744435131549835, "rewards/margins": 3.1911227703094482, "rewards/rejected": -3.3085670471191406, "step": 1991 }, { "epoch": 0.23, "learning_rate": 2.346482500292637e-07, "logits/chosen": -2.7963690757751465, "logits/rejected": -3.1021127700805664, "logps/chosen": -271.590087890625, "logps/rejected": -234.71060180664062, "loss": 0.4475, "rewards/accuracies": 0.875, "rewards/chosen": 0.1693440079689026, "rewards/margins": 2.1083009243011475, "rewards/rejected": -1.9389569759368896, "step": 1992 }, { "epoch": 0.23, "learning_rate": 2.3461313355963948e-07, "logits/chosen": -2.612342357635498, "logits/rejected": -2.4271016120910645, "logps/chosen": -414.65728759765625, "logps/rejected": -272.4967956542969, "loss": 0.6568, "rewards/accuracies": 0.625, "rewards/chosen": -0.8659661412239075, "rewards/margins": 0.27033287286758423, "rewards/rejected": -1.1362990140914917, "step": 1993 }, { "epoch": 0.23, "learning_rate": 2.3457801709001518e-07, "logits/chosen": -3.2016282081604004, "logits/rejected": -3.3491063117980957, "logps/chosen": -133.52964782714844, "logps/rejected": -174.03402709960938, "loss": 0.4785, "rewards/accuracies": 0.625, "rewards/chosen": 0.12429818511009216, "rewards/margins": 1.8087689876556396, "rewards/rejected": -1.6844708919525146, "step": 1994 }, { "epoch": 0.23, "learning_rate": 2.3454290062039096e-07, "logits/chosen": -3.4598541259765625, "logits/rejected": -3.361279010772705, "logps/chosen": -271.35675048828125, "logps/rejected": -277.79791259765625, "loss": 0.707, "rewards/accuracies": 0.625, "rewards/chosen": -0.6304394006729126, "rewards/margins": 1.3628253936767578, "rewards/rejected": -1.9932647943496704, "step": 1995 }, { "epoch": 0.23, "learning_rate": 2.345077841507667e-07, "logits/chosen": -2.536715030670166, "logits/rejected": -2.762017250061035, "logps/chosen": -190.5369873046875, "logps/rejected": -277.1086730957031, "loss": 0.3889, "rewards/accuracies": 0.875, "rewards/chosen": 0.3141373097896576, "rewards/margins": 1.5980970859527588, "rewards/rejected": -1.2839598655700684, "step": 1996 }, { "epoch": 0.23, "learning_rate": 2.3447266768114244e-07, "logits/chosen": -3.0305919647216797, "logits/rejected": -2.9997315406799316, "logps/chosen": -472.67926025390625, "logps/rejected": -311.43670654296875, "loss": 0.4757, "rewards/accuracies": 0.875, "rewards/chosen": 0.08996890485286713, "rewards/margins": 1.4198189973831177, "rewards/rejected": -1.3298500776290894, "step": 1997 }, { "epoch": 0.23, "learning_rate": 2.344375512115182e-07, "logits/chosen": -3.108970880508423, "logits/rejected": -3.1756434440612793, "logps/chosen": -256.4834899902344, "logps/rejected": -341.9066162109375, "loss": 0.513, "rewards/accuracies": 0.625, "rewards/chosen": -0.25651776790618896, "rewards/margins": 1.23410964012146, "rewards/rejected": -1.4906272888183594, "step": 1998 }, { "epoch": 0.23, "learning_rate": 2.3440243474189395e-07, "logits/chosen": -2.774517297744751, "logits/rejected": -2.6736412048339844, "logps/chosen": -266.9547424316406, "logps/rejected": -213.7591552734375, "loss": 0.5036, "rewards/accuracies": 0.625, "rewards/chosen": -0.02348414435982704, "rewards/margins": 1.35770845413208, "rewards/rejected": -1.3811925649642944, "step": 1999 }, { "epoch": 0.23, "learning_rate": 2.3436731827226967e-07, "logits/chosen": -3.4116063117980957, "logits/rejected": -2.9712471961975098, "logps/chosen": -211.96937561035156, "logps/rejected": -190.48724365234375, "loss": 1.03, "rewards/accuracies": 0.5, "rewards/chosen": -0.45111793279647827, "rewards/margins": 0.2641064524650574, "rewards/rejected": -0.7152243256568909, "step": 2000 }, { "epoch": 0.23, "eval_logits/chosen": -2.84497332572937, "eval_logits/rejected": -2.8033573627471924, "eval_logps/chosen": -293.25732421875, "eval_logps/rejected": -233.89837646484375, "eval_loss": 0.45588183403015137, "eval_rewards/accuracies": 0.7571428418159485, "eval_rewards/chosen": 0.07976274937391281, "eval_rewards/margins": 1.035339117050171, "eval_rewards/rejected": -0.9555763006210327, "eval_runtime": 32.7912, "eval_samples_per_second": 2.135, "eval_steps_per_second": 1.067, "step": 2000 }, { "epoch": 0.23, "learning_rate": 2.3433220180264543e-07, "logits/chosen": -3.631974697113037, "logits/rejected": -3.316451072692871, "logps/chosen": -205.1908721923828, "logps/rejected": -337.1497497558594, "loss": 0.1594, "rewards/accuracies": 1.0, "rewards/chosen": 0.005931135267019272, "rewards/margins": 1.8907032012939453, "rewards/rejected": -1.8847721815109253, "step": 2001 }, { "epoch": 0.23, "learning_rate": 2.3429708533302116e-07, "logits/chosen": -2.9462764263153076, "logits/rejected": -2.821960926055908, "logps/chosen": -189.9939727783203, "logps/rejected": -253.6620330810547, "loss": 0.351, "rewards/accuracies": 0.875, "rewards/chosen": -0.3489038944244385, "rewards/margins": 1.5633512735366821, "rewards/rejected": -1.912255048751831, "step": 2002 }, { "epoch": 0.23, "learning_rate": 2.342619688633969e-07, "logits/chosen": -3.9396228790283203, "logits/rejected": -3.848588466644287, "logps/chosen": -148.9018096923828, "logps/rejected": -193.3266143798828, "loss": 0.4213, "rewards/accuracies": 0.75, "rewards/chosen": -0.319566011428833, "rewards/margins": 2.0531301498413086, "rewards/rejected": -2.3726961612701416, "step": 2003 }, { "epoch": 0.23, "learning_rate": 2.342268523937727e-07, "logits/chosen": -3.3765323162078857, "logits/rejected": -3.3767380714416504, "logps/chosen": -241.10079956054688, "logps/rejected": -232.6448211669922, "loss": 0.7504, "rewards/accuracies": 0.75, "rewards/chosen": -0.6027411222457886, "rewards/margins": 1.359520673751831, "rewards/rejected": -1.9622617959976196, "step": 2004 }, { "epoch": 0.23, "learning_rate": 2.3419173592414842e-07, "logits/chosen": -3.2411155700683594, "logits/rejected": -3.378081798553467, "logps/chosen": -459.50311279296875, "logps/rejected": -670.8847045898438, "loss": 0.4194, "rewards/accuracies": 0.625, "rewards/chosen": -0.3092588484287262, "rewards/margins": 1.3548946380615234, "rewards/rejected": -1.6641534566879272, "step": 2005 }, { "epoch": 0.23, "learning_rate": 2.3415661945452417e-07, "logits/chosen": -3.3376681804656982, "logits/rejected": -3.168686866760254, "logps/chosen": -406.0707092285156, "logps/rejected": -239.297607421875, "loss": 0.3926, "rewards/accuracies": 0.875, "rewards/chosen": -0.4092297852039337, "rewards/margins": 1.8080594539642334, "rewards/rejected": -2.217289447784424, "step": 2006 }, { "epoch": 0.23, "learning_rate": 2.3412150298489992e-07, "logits/chosen": -3.073347330093384, "logits/rejected": -3.071197986602783, "logps/chosen": -182.3142852783203, "logps/rejected": -223.7637939453125, "loss": 0.2561, "rewards/accuracies": 1.0, "rewards/chosen": 0.025876428931951523, "rewards/margins": 1.7001011371612549, "rewards/rejected": -1.674224853515625, "step": 2007 }, { "epoch": 0.23, "learning_rate": 2.3408638651527565e-07, "logits/chosen": -2.645876884460449, "logits/rejected": -2.553802013397217, "logps/chosen": -143.4980010986328, "logps/rejected": -225.02066040039062, "loss": 0.551, "rewards/accuracies": 0.75, "rewards/chosen": -0.30724093317985535, "rewards/margins": 0.5976823568344116, "rewards/rejected": -0.9049233198165894, "step": 2008 }, { "epoch": 0.23, "learning_rate": 2.340512700456514e-07, "logits/chosen": -2.7979679107666016, "logits/rejected": -3.0682616233825684, "logps/chosen": -214.00424194335938, "logps/rejected": -242.20034790039062, "loss": 0.3809, "rewards/accuracies": 0.75, "rewards/chosen": 0.0684867799282074, "rewards/margins": 1.3817732334136963, "rewards/rejected": -1.313286542892456, "step": 2009 }, { "epoch": 0.23, "learning_rate": 2.3401615357602713e-07, "logits/chosen": -3.442659616470337, "logits/rejected": -3.7547972202301025, "logps/chosen": -143.4029083251953, "logps/rejected": -194.121337890625, "loss": 0.568, "rewards/accuracies": 0.75, "rewards/chosen": -0.04603178799152374, "rewards/margins": 1.5408352613449097, "rewards/rejected": -1.5868672132492065, "step": 2010 }, { "epoch": 0.23, "learning_rate": 2.339810371064029e-07, "logits/chosen": -3.143305778503418, "logits/rejected": -2.797666311264038, "logps/chosen": -206.4252166748047, "logps/rejected": -227.5640869140625, "loss": 0.4027, "rewards/accuracies": 0.875, "rewards/chosen": 0.028666503727436066, "rewards/margins": 0.896481454372406, "rewards/rejected": -0.8678149580955505, "step": 2011 }, { "epoch": 0.23, "learning_rate": 2.3394592063677864e-07, "logits/chosen": -2.2647078037261963, "logits/rejected": -2.648935317993164, "logps/chosen": -198.26852416992188, "logps/rejected": -214.53927612304688, "loss": 0.2062, "rewards/accuracies": 1.0, "rewards/chosen": 0.39364778995513916, "rewards/margins": 2.059122085571289, "rewards/rejected": -1.665474534034729, "step": 2012 }, { "epoch": 0.23, "learning_rate": 2.3391080416715437e-07, "logits/chosen": -3.081793785095215, "logits/rejected": -3.18727445602417, "logps/chosen": -383.7090148925781, "logps/rejected": -410.2918701171875, "loss": 0.5071, "rewards/accuracies": 0.75, "rewards/chosen": 0.17957527935504913, "rewards/margins": 1.3364853858947754, "rewards/rejected": -1.1569101810455322, "step": 2013 }, { "epoch": 0.23, "learning_rate": 2.3387568769753012e-07, "logits/chosen": -3.4525787830352783, "logits/rejected": -3.156954050064087, "logps/chosen": -919.3490600585938, "logps/rejected": -304.83258056640625, "loss": 0.6537, "rewards/accuracies": 0.625, "rewards/chosen": -0.19651490449905396, "rewards/margins": 0.9749191999435425, "rewards/rejected": -1.1714341640472412, "step": 2014 }, { "epoch": 0.23, "learning_rate": 2.338405712279059e-07, "logits/chosen": -3.2447586059570312, "logits/rejected": -2.891024112701416, "logps/chosen": -277.0435791015625, "logps/rejected": -337.3553771972656, "loss": 0.8479, "rewards/accuracies": 0.625, "rewards/chosen": -0.21248409152030945, "rewards/margins": 0.5197092890739441, "rewards/rejected": -0.7321934700012207, "step": 2015 }, { "epoch": 0.23, "learning_rate": 2.3380545475828163e-07, "logits/chosen": -2.3986520767211914, "logits/rejected": -2.9115982055664062, "logps/chosen": -225.97000122070312, "logps/rejected": -271.666259765625, "loss": 0.5806, "rewards/accuracies": 0.5, "rewards/chosen": -0.3381935954093933, "rewards/margins": 0.7461787462234497, "rewards/rejected": -1.0843722820281982, "step": 2016 }, { "epoch": 0.23, "learning_rate": 2.3377033828865738e-07, "logits/chosen": -2.841732978820801, "logits/rejected": -3.3018264770507812, "logps/chosen": -151.11618041992188, "logps/rejected": -166.3741455078125, "loss": 0.4516, "rewards/accuracies": 0.75, "rewards/chosen": -0.16383850574493408, "rewards/margins": 0.8887876272201538, "rewards/rejected": -1.052626132965088, "step": 2017 }, { "epoch": 0.23, "learning_rate": 2.337352218190331e-07, "logits/chosen": -3.245800495147705, "logits/rejected": -3.195681095123291, "logps/chosen": -184.68496704101562, "logps/rejected": -324.7975769042969, "loss": 0.2785, "rewards/accuracies": 0.875, "rewards/chosen": 0.24927213788032532, "rewards/margins": 2.601172924041748, "rewards/rejected": -2.351900577545166, "step": 2018 }, { "epoch": 0.23, "learning_rate": 2.3370010534940886e-07, "logits/chosen": -3.6032605171203613, "logits/rejected": -3.8894782066345215, "logps/chosen": -90.32426452636719, "logps/rejected": -189.17861938476562, "loss": 0.268, "rewards/accuracies": 0.875, "rewards/chosen": 0.05410033091902733, "rewards/margins": 2.307948589324951, "rewards/rejected": -2.253848075866699, "step": 2019 }, { "epoch": 0.23, "learning_rate": 2.3366498887978462e-07, "logits/chosen": -2.290137767791748, "logits/rejected": -2.274712085723877, "logps/chosen": -313.7291564941406, "logps/rejected": -258.10540771484375, "loss": 0.513, "rewards/accuracies": 0.5, "rewards/chosen": -0.5373225212097168, "rewards/margins": 0.8017616271972656, "rewards/rejected": -1.3390840291976929, "step": 2020 }, { "epoch": 0.23, "learning_rate": 2.3362987241016035e-07, "logits/chosen": -2.9940185546875, "logits/rejected": -3.140697479248047, "logps/chosen": -275.61041259765625, "logps/rejected": -196.7899169921875, "loss": 0.2742, "rewards/accuracies": 0.875, "rewards/chosen": 0.6242948174476624, "rewards/margins": 1.6437617540359497, "rewards/rejected": -1.0194669961929321, "step": 2021 }, { "epoch": 0.23, "learning_rate": 2.335947559405361e-07, "logits/chosen": -3.541834831237793, "logits/rejected": -3.439565658569336, "logps/chosen": -172.88687133789062, "logps/rejected": -255.30221557617188, "loss": 0.2693, "rewards/accuracies": 1.0, "rewards/chosen": -0.458914577960968, "rewards/margins": 2.003291130065918, "rewards/rejected": -2.462205648422241, "step": 2022 }, { "epoch": 0.23, "learning_rate": 2.3355963947091185e-07, "logits/chosen": -3.9574637413024902, "logits/rejected": -3.7782845497131348, "logps/chosen": -264.4451599121094, "logps/rejected": -276.3128967285156, "loss": 0.3568, "rewards/accuracies": 0.875, "rewards/chosen": -0.5034382343292236, "rewards/margins": 2.0167386531829834, "rewards/rejected": -2.520176887512207, "step": 2023 }, { "epoch": 0.23, "learning_rate": 2.3352452300128758e-07, "logits/chosen": -3.063586950302124, "logits/rejected": -3.0610733032226562, "logps/chosen": -268.8010559082031, "logps/rejected": -231.368408203125, "loss": 0.3525, "rewards/accuracies": 0.875, "rewards/chosen": -0.1789417415857315, "rewards/margins": 1.3715598583221436, "rewards/rejected": -1.550501823425293, "step": 2024 }, { "epoch": 0.23, "learning_rate": 2.3348940653166334e-07, "logits/chosen": -2.6767821311950684, "logits/rejected": -2.8032383918762207, "logps/chosen": -142.48199462890625, "logps/rejected": -156.19973754882812, "loss": 0.3928, "rewards/accuracies": 0.625, "rewards/chosen": -0.01817232370376587, "rewards/margins": 1.2058762311935425, "rewards/rejected": -1.2240486145019531, "step": 2025 }, { "epoch": 0.23, "learning_rate": 2.3345429006203906e-07, "logits/chosen": -3.4470295906066895, "logits/rejected": -3.3284735679626465, "logps/chosen": -245.08966064453125, "logps/rejected": -232.45571899414062, "loss": 0.653, "rewards/accuracies": 0.75, "rewards/chosen": -0.3664732575416565, "rewards/margins": 0.8882078528404236, "rewards/rejected": -1.25468111038208, "step": 2026 }, { "epoch": 0.23, "learning_rate": 2.3341917359241484e-07, "logits/chosen": -2.7545552253723145, "logits/rejected": -2.7538089752197266, "logps/chosen": -150.04014587402344, "logps/rejected": -214.3499755859375, "loss": 0.2637, "rewards/accuracies": 0.875, "rewards/chosen": 0.41995683312416077, "rewards/margins": 1.5865967273712158, "rewards/rejected": -1.1666399240493774, "step": 2027 }, { "epoch": 0.23, "learning_rate": 2.333840571227906e-07, "logits/chosen": -3.548743486404419, "logits/rejected": -3.684885263442993, "logps/chosen": -252.48455810546875, "logps/rejected": -194.57154846191406, "loss": 0.2726, "rewards/accuracies": 0.875, "rewards/chosen": -0.001837063580751419, "rewards/margins": 1.6874967813491821, "rewards/rejected": -1.6893337965011597, "step": 2028 }, { "epoch": 0.23, "learning_rate": 2.3334894065316632e-07, "logits/chosen": -3.33207631111145, "logits/rejected": -3.281633138656616, "logps/chosen": -482.3106384277344, "logps/rejected": -255.0568084716797, "loss": 0.2113, "rewards/accuracies": 0.875, "rewards/chosen": 0.5264302492141724, "rewards/margins": 2.3673295974731445, "rewards/rejected": -1.8408994674682617, "step": 2029 }, { "epoch": 0.23, "learning_rate": 2.3331382418354208e-07, "logits/chosen": -2.669374942779541, "logits/rejected": -2.921800374984741, "logps/chosen": -364.39935302734375, "logps/rejected": -233.28269958496094, "loss": 0.6693, "rewards/accuracies": 0.5, "rewards/chosen": -0.5869595408439636, "rewards/margins": 0.3963136076927185, "rewards/rejected": -0.9832731485366821, "step": 2030 }, { "epoch": 0.23, "learning_rate": 2.3327870771391783e-07, "logits/chosen": -3.4732565879821777, "logits/rejected": -3.605494260787964, "logps/chosen": -198.308349609375, "logps/rejected": -190.59176635742188, "loss": 0.274, "rewards/accuracies": 1.0, "rewards/chosen": 0.564396858215332, "rewards/margins": 1.867499589920044, "rewards/rejected": -1.3031026124954224, "step": 2031 }, { "epoch": 0.23, "learning_rate": 2.3324359124429356e-07, "logits/chosen": -3.0682919025421143, "logits/rejected": -3.3911755084991455, "logps/chosen": -194.8433837890625, "logps/rejected": -226.01980590820312, "loss": 0.4594, "rewards/accuracies": 0.75, "rewards/chosen": 0.09444911032915115, "rewards/margins": 1.8091578483581543, "rewards/rejected": -1.714708685874939, "step": 2032 }, { "epoch": 0.23, "learning_rate": 2.332084747746693e-07, "logits/chosen": -3.734302520751953, "logits/rejected": -3.6981258392333984, "logps/chosen": -263.3233337402344, "logps/rejected": -304.0080261230469, "loss": 0.2715, "rewards/accuracies": 0.875, "rewards/chosen": -0.4291212558746338, "rewards/margins": 2.5984606742858887, "rewards/rejected": -3.0275819301605225, "step": 2033 }, { "epoch": 0.23, "learning_rate": 2.3317335830504504e-07, "logits/chosen": -2.865103244781494, "logits/rejected": -2.8553848266601562, "logps/chosen": -304.69525146484375, "logps/rejected": -270.1980895996094, "loss": 0.4169, "rewards/accuracies": 0.875, "rewards/chosen": 0.3193957507610321, "rewards/margins": 1.4623113870620728, "rewards/rejected": -1.1429156064987183, "step": 2034 }, { "epoch": 0.23, "learning_rate": 2.331382418354208e-07, "logits/chosen": -3.4932947158813477, "logits/rejected": -3.5716090202331543, "logps/chosen": -97.14220428466797, "logps/rejected": -182.97845458984375, "loss": 0.2682, "rewards/accuracies": 0.875, "rewards/chosen": -0.4542883634567261, "rewards/margins": 1.6249351501464844, "rewards/rejected": -2.0792236328125, "step": 2035 }, { "epoch": 0.23, "learning_rate": 2.3310312536579655e-07, "logits/chosen": -2.827333450317383, "logits/rejected": -3.074057102203369, "logps/chosen": -187.10299682617188, "logps/rejected": -214.9417724609375, "loss": 0.3275, "rewards/accuracies": 0.875, "rewards/chosen": 0.43410050868988037, "rewards/margins": 2.3590221405029297, "rewards/rejected": -1.9249216318130493, "step": 2036 }, { "epoch": 0.23, "learning_rate": 2.3306800889617228e-07, "logits/chosen": -3.311232566833496, "logits/rejected": -2.9895005226135254, "logps/chosen": -203.78494262695312, "logps/rejected": -176.9379119873047, "loss": 0.3698, "rewards/accuracies": 0.875, "rewards/chosen": 0.2735413610935211, "rewards/margins": 1.570022702217102, "rewards/rejected": -1.2964814901351929, "step": 2037 }, { "epoch": 0.23, "learning_rate": 2.3303289242654806e-07, "logits/chosen": -3.495087146759033, "logits/rejected": -3.3478736877441406, "logps/chosen": -236.81210327148438, "logps/rejected": -227.91860961914062, "loss": 0.448, "rewards/accuracies": 0.625, "rewards/chosen": -0.6140186190605164, "rewards/margins": 1.039901614189148, "rewards/rejected": -1.653920292854309, "step": 2038 }, { "epoch": 0.24, "learning_rate": 2.329977759569238e-07, "logits/chosen": -3.367856025695801, "logits/rejected": -3.5354931354522705, "logps/chosen": -98.99674224853516, "logps/rejected": -166.5740966796875, "loss": 0.456, "rewards/accuracies": 0.625, "rewards/chosen": -0.31941890716552734, "rewards/margins": 1.4417237043380737, "rewards/rejected": -1.761142611503601, "step": 2039 }, { "epoch": 0.24, "learning_rate": 2.3296265948729954e-07, "logits/chosen": -2.9856796264648438, "logits/rejected": -3.249156951904297, "logps/chosen": -236.30120849609375, "logps/rejected": -281.39324951171875, "loss": 0.301, "rewards/accuracies": 0.875, "rewards/chosen": -0.11615180224180222, "rewards/margins": 1.4050902128219604, "rewards/rejected": -1.5212421417236328, "step": 2040 }, { "epoch": 0.24, "learning_rate": 2.329275430176753e-07, "logits/chosen": -3.1886255741119385, "logits/rejected": -3.8579916954040527, "logps/chosen": -218.33627319335938, "logps/rejected": -339.0235595703125, "loss": 0.2139, "rewards/accuracies": 1.0, "rewards/chosen": 0.2469361275434494, "rewards/margins": 2.5802955627441406, "rewards/rejected": -2.333359718322754, "step": 2041 }, { "epoch": 0.24, "learning_rate": 2.3289242654805102e-07, "logits/chosen": -3.01922869682312, "logits/rejected": -2.592362403869629, "logps/chosen": -371.1042785644531, "logps/rejected": -258.16058349609375, "loss": 0.6831, "rewards/accuracies": 0.625, "rewards/chosen": -0.14682637155056, "rewards/margins": 0.264029324054718, "rewards/rejected": -0.4108557105064392, "step": 2042 }, { "epoch": 0.24, "learning_rate": 2.3285731007842677e-07, "logits/chosen": -3.91856050491333, "logits/rejected": -3.606844663619995, "logps/chosen": -339.1500549316406, "logps/rejected": -207.0172119140625, "loss": 0.6975, "rewards/accuracies": 0.625, "rewards/chosen": -0.4894591271877289, "rewards/margins": 0.12524759769439697, "rewards/rejected": -0.6147066950798035, "step": 2043 }, { "epoch": 0.24, "learning_rate": 2.3282219360880253e-07, "logits/chosen": -3.613152027130127, "logits/rejected": -3.046384334564209, "logps/chosen": -275.8511047363281, "logps/rejected": -142.7661590576172, "loss": 0.3015, "rewards/accuracies": 0.75, "rewards/chosen": 0.10897894948720932, "rewards/margins": 1.6519333124160767, "rewards/rejected": -1.5429542064666748, "step": 2044 }, { "epoch": 0.24, "learning_rate": 2.3278707713917825e-07, "logits/chosen": -3.560540199279785, "logits/rejected": -3.3008222579956055, "logps/chosen": -170.01898193359375, "logps/rejected": -198.36279296875, "loss": 0.2842, "rewards/accuracies": 0.875, "rewards/chosen": 0.1061142310500145, "rewards/margins": 1.7505090236663818, "rewards/rejected": -1.6443949937820435, "step": 2045 }, { "epoch": 0.24, "learning_rate": 2.32751960669554e-07, "logits/chosen": -2.879520893096924, "logits/rejected": -2.573899269104004, "logps/chosen": -406.53265380859375, "logps/rejected": -242.12899780273438, "loss": 0.8494, "rewards/accuracies": 0.75, "rewards/chosen": -0.4708259701728821, "rewards/margins": 0.6300718188285828, "rewards/rejected": -1.1008977890014648, "step": 2046 }, { "epoch": 0.24, "learning_rate": 2.3271684419992973e-07, "logits/chosen": -3.217991352081299, "logits/rejected": -2.8754897117614746, "logps/chosen": -263.68121337890625, "logps/rejected": -271.5013427734375, "loss": 0.4251, "rewards/accuracies": 0.75, "rewards/chosen": -0.06216999888420105, "rewards/margins": 0.8423784971237183, "rewards/rejected": -0.9045485258102417, "step": 2047 }, { "epoch": 0.24, "learning_rate": 2.326817277303055e-07, "logits/chosen": -2.6422576904296875, "logits/rejected": -2.5133559703826904, "logps/chosen": -368.7999267578125, "logps/rejected": -332.3048400878906, "loss": 0.7202, "rewards/accuracies": 0.625, "rewards/chosen": -0.14642934501171112, "rewards/margins": 0.6152974367141724, "rewards/rejected": -0.7617268562316895, "step": 2048 }, { "epoch": 0.24, "learning_rate": 2.3264661126068127e-07, "logits/chosen": -3.1302313804626465, "logits/rejected": -2.920776844024658, "logps/chosen": -312.0892333984375, "logps/rejected": -202.4169464111328, "loss": 0.294, "rewards/accuracies": 0.875, "rewards/chosen": 0.3656645715236664, "rewards/margins": 1.546316385269165, "rewards/rejected": -1.1806517839431763, "step": 2049 }, { "epoch": 0.24, "learning_rate": 2.32611494791057e-07, "logits/chosen": -3.1597986221313477, "logits/rejected": -3.3330230712890625, "logps/chosen": -223.2503662109375, "logps/rejected": -358.99755859375, "loss": 0.3259, "rewards/accuracies": 0.875, "rewards/chosen": 0.21997302770614624, "rewards/margins": 2.1808881759643555, "rewards/rejected": -1.960915207862854, "step": 2050 }, { "epoch": 0.24, "learning_rate": 2.3257637832143275e-07, "logits/chosen": -3.1756839752197266, "logits/rejected": -2.7677314281463623, "logps/chosen": -254.58428955078125, "logps/rejected": -170.6003875732422, "loss": 0.335, "rewards/accuracies": 0.75, "rewards/chosen": 0.13010364770889282, "rewards/margins": 1.7006912231445312, "rewards/rejected": -1.570587396621704, "step": 2051 }, { "epoch": 0.24, "learning_rate": 2.325412618518085e-07, "logits/chosen": -3.3310437202453613, "logits/rejected": -3.3536481857299805, "logps/chosen": -256.8912353515625, "logps/rejected": -275.40472412109375, "loss": 0.3766, "rewards/accuracies": 0.875, "rewards/chosen": 0.12958364188671112, "rewards/margins": 1.7343131303787231, "rewards/rejected": -1.604729413986206, "step": 2052 }, { "epoch": 0.24, "learning_rate": 2.3250614538218423e-07, "logits/chosen": -3.2945964336395264, "logits/rejected": -3.4332611560821533, "logps/chosen": -194.5066375732422, "logps/rejected": -329.435791015625, "loss": 0.0718, "rewards/accuracies": 1.0, "rewards/chosen": 0.7774198055267334, "rewards/margins": 4.159289360046387, "rewards/rejected": -3.3818695545196533, "step": 2053 }, { "epoch": 0.24, "learning_rate": 2.3247102891255999e-07, "logits/chosen": -2.982991933822632, "logits/rejected": -2.9232401847839355, "logps/chosen": -213.48809814453125, "logps/rejected": -351.4576416015625, "loss": 0.3627, "rewards/accuracies": 0.75, "rewards/chosen": 0.13747844099998474, "rewards/margins": 3.1277759075164795, "rewards/rejected": -2.990297794342041, "step": 2054 }, { "epoch": 0.24, "learning_rate": 2.324359124429357e-07, "logits/chosen": -2.8734798431396484, "logits/rejected": -2.8921728134155273, "logps/chosen": -322.9697570800781, "logps/rejected": -226.138671875, "loss": 0.5787, "rewards/accuracies": 0.75, "rewards/chosen": 0.2873033285140991, "rewards/margins": 1.1255102157592773, "rewards/rejected": -0.8382070064544678, "step": 2055 }, { "epoch": 0.24, "learning_rate": 2.3240079597331147e-07, "logits/chosen": -2.7112746238708496, "logits/rejected": -2.5812742710113525, "logps/chosen": -196.1005859375, "logps/rejected": -271.1686706542969, "loss": 0.3086, "rewards/accuracies": 0.75, "rewards/chosen": 0.15070503950119019, "rewards/margins": 2.9618914127349854, "rewards/rejected": -2.8111863136291504, "step": 2056 }, { "epoch": 0.24, "learning_rate": 2.3236567950368722e-07, "logits/chosen": -3.264662504196167, "logits/rejected": -3.1798624992370605, "logps/chosen": -348.24273681640625, "logps/rejected": -242.38970947265625, "loss": 0.4781, "rewards/accuracies": 0.875, "rewards/chosen": 0.515073835849762, "rewards/margins": 1.0141679048538208, "rewards/rejected": -0.4990941286087036, "step": 2057 }, { "epoch": 0.24, "learning_rate": 2.3233056303406295e-07, "logits/chosen": -3.362121820449829, "logits/rejected": -3.3651020526885986, "logps/chosen": -230.2619171142578, "logps/rejected": -254.53724670410156, "loss": 0.5208, "rewards/accuracies": 0.875, "rewards/chosen": 0.09543068706989288, "rewards/margins": 1.0240224599838257, "rewards/rejected": -0.9285917282104492, "step": 2058 }, { "epoch": 0.24, "learning_rate": 2.322954465644387e-07, "logits/chosen": -2.944413185119629, "logits/rejected": -3.228311061859131, "logps/chosen": -193.652099609375, "logps/rejected": -226.46304321289062, "loss": 0.6407, "rewards/accuracies": 0.625, "rewards/chosen": 0.17432984709739685, "rewards/margins": 0.6353402733802795, "rewards/rejected": -0.4610104560852051, "step": 2059 }, { "epoch": 0.24, "learning_rate": 2.3226033009481448e-07, "logits/chosen": -3.9251208305358887, "logits/rejected": -3.854796886444092, "logps/chosen": -204.34645080566406, "logps/rejected": -273.2313232421875, "loss": 0.313, "rewards/accuracies": 0.875, "rewards/chosen": -0.7787759304046631, "rewards/margins": 1.3681988716125488, "rewards/rejected": -2.146974563598633, "step": 2060 }, { "epoch": 0.24, "learning_rate": 2.322252136251902e-07, "logits/chosen": -2.9845826625823975, "logits/rejected": -3.1362674236297607, "logps/chosen": -179.7996826171875, "logps/rejected": -229.71188354492188, "loss": 0.4382, "rewards/accuracies": 0.875, "rewards/chosen": -0.14942818880081177, "rewards/margins": 0.7847808003425598, "rewards/rejected": -0.9342089295387268, "step": 2061 }, { "epoch": 0.24, "learning_rate": 2.3219009715556596e-07, "logits/chosen": -3.1472270488739014, "logits/rejected": -3.2027831077575684, "logps/chosen": -250.66317749023438, "logps/rejected": -249.25238037109375, "loss": 0.5574, "rewards/accuracies": 0.625, "rewards/chosen": 0.3986305892467499, "rewards/margins": 0.97455233335495, "rewards/rejected": -0.5759217143058777, "step": 2062 }, { "epoch": 0.24, "learning_rate": 2.321549806859417e-07, "logits/chosen": -3.1092135906219482, "logits/rejected": -2.8321921825408936, "logps/chosen": -374.6018371582031, "logps/rejected": -338.23846435546875, "loss": 0.2209, "rewards/accuracies": 1.0, "rewards/chosen": 0.2629488706588745, "rewards/margins": 1.969550371170044, "rewards/rejected": -1.706601619720459, "step": 2063 }, { "epoch": 0.24, "learning_rate": 2.3211986421631744e-07, "logits/chosen": -2.9038593769073486, "logits/rejected": -3.2201952934265137, "logps/chosen": -224.44158935546875, "logps/rejected": -218.9890594482422, "loss": 0.6334, "rewards/accuracies": 0.75, "rewards/chosen": -0.05779045820236206, "rewards/margins": 0.5659310817718506, "rewards/rejected": -0.6237214803695679, "step": 2064 }, { "epoch": 0.24, "learning_rate": 2.320847477466932e-07, "logits/chosen": -2.447350025177002, "logits/rejected": -2.329530715942383, "logps/chosen": -241.9302520751953, "logps/rejected": -236.34881591796875, "loss": 0.6439, "rewards/accuracies": 0.75, "rewards/chosen": -0.045799076557159424, "rewards/margins": 0.5019633769989014, "rewards/rejected": -0.5477623343467712, "step": 2065 }, { "epoch": 0.24, "learning_rate": 2.3204963127706893e-07, "logits/chosen": -3.6228723526000977, "logits/rejected": -3.3741965293884277, "logps/chosen": -271.9809265136719, "logps/rejected": -237.58929443359375, "loss": 0.2628, "rewards/accuracies": 0.875, "rewards/chosen": 0.09044289588928223, "rewards/margins": 2.3310208320617676, "rewards/rejected": -2.2405776977539062, "step": 2066 }, { "epoch": 0.24, "learning_rate": 2.3201451480744468e-07, "logits/chosen": -3.1667304039001465, "logits/rejected": -3.262972354888916, "logps/chosen": -123.07827758789062, "logps/rejected": -151.53941345214844, "loss": 0.2892, "rewards/accuracies": 0.875, "rewards/chosen": 0.3378388583660126, "rewards/margins": 1.7564888000488281, "rewards/rejected": -1.4186499118804932, "step": 2067 }, { "epoch": 0.24, "learning_rate": 2.3197939833782043e-07, "logits/chosen": -3.7089505195617676, "logits/rejected": -3.5454938411712646, "logps/chosen": -137.44082641601562, "logps/rejected": -157.4412841796875, "loss": 0.6137, "rewards/accuracies": 0.625, "rewards/chosen": -0.41431868076324463, "rewards/margins": 0.894723117351532, "rewards/rejected": -1.3090417385101318, "step": 2068 }, { "epoch": 0.24, "learning_rate": 2.3194428186819616e-07, "logits/chosen": -3.1241326332092285, "logits/rejected": -2.979635238647461, "logps/chosen": -340.079833984375, "logps/rejected": -253.205810546875, "loss": 0.3211, "rewards/accuracies": 0.875, "rewards/chosen": 0.24286359548568726, "rewards/margins": 1.4917945861816406, "rewards/rejected": -1.2489310503005981, "step": 2069 }, { "epoch": 0.24, "learning_rate": 2.3190916539857191e-07, "logits/chosen": -3.3570804595947266, "logits/rejected": -3.229564905166626, "logps/chosen": -162.0731201171875, "logps/rejected": -130.47509765625, "loss": 0.3454, "rewards/accuracies": 0.875, "rewards/chosen": 0.2674616575241089, "rewards/margins": 1.1245052814483643, "rewards/rejected": -0.8570435047149658, "step": 2070 }, { "epoch": 0.24, "learning_rate": 2.3187404892894764e-07, "logits/chosen": -2.6870970726013184, "logits/rejected": -2.5410425662994385, "logps/chosen": -357.74017333984375, "logps/rejected": -200.80682373046875, "loss": 0.7472, "rewards/accuracies": 0.625, "rewards/chosen": -0.6361706256866455, "rewards/margins": 0.39179790019989014, "rewards/rejected": -1.0279685258865356, "step": 2071 }, { "epoch": 0.24, "learning_rate": 2.3183893245932342e-07, "logits/chosen": -3.490499258041382, "logits/rejected": -2.9236066341400146, "logps/chosen": -211.46511840820312, "logps/rejected": -208.77003479003906, "loss": 0.5209, "rewards/accuracies": 0.75, "rewards/chosen": -0.09810754656791687, "rewards/margins": 0.593243420124054, "rewards/rejected": -0.691351056098938, "step": 2072 }, { "epoch": 0.24, "learning_rate": 2.3180381598969918e-07, "logits/chosen": -2.4475386142730713, "logits/rejected": -2.4813172817230225, "logps/chosen": -239.48947143554688, "logps/rejected": -199.1544189453125, "loss": 0.4764, "rewards/accuracies": 0.625, "rewards/chosen": -0.42903584241867065, "rewards/margins": 1.0240083932876587, "rewards/rejected": -1.4530441761016846, "step": 2073 }, { "epoch": 0.24, "learning_rate": 2.317686995200749e-07, "logits/chosen": -4.11273717880249, "logits/rejected": -3.5686588287353516, "logps/chosen": -444.4793701171875, "logps/rejected": -270.8744201660156, "loss": 0.3164, "rewards/accuracies": 0.75, "rewards/chosen": 0.11237752437591553, "rewards/margins": 1.9849063158035278, "rewards/rejected": -1.8725286722183228, "step": 2074 }, { "epoch": 0.24, "learning_rate": 2.3173358305045066e-07, "logits/chosen": -3.6211235523223877, "logits/rejected": -3.3090083599090576, "logps/chosen": -255.5709228515625, "logps/rejected": -201.60833740234375, "loss": 0.441, "rewards/accuracies": 0.875, "rewards/chosen": -0.05448861047625542, "rewards/margins": 0.8456190228462219, "rewards/rejected": -0.9001076221466064, "step": 2075 }, { "epoch": 0.24, "learning_rate": 2.316984665808264e-07, "logits/chosen": -2.211095094680786, "logits/rejected": -2.259305953979492, "logps/chosen": -380.33465576171875, "logps/rejected": -292.9688720703125, "loss": 0.3006, "rewards/accuracies": 0.75, "rewards/chosen": 0.6065548658370972, "rewards/margins": 2.546454906463623, "rewards/rejected": -1.9399000406265259, "step": 2076 }, { "epoch": 0.24, "learning_rate": 2.3166335011120214e-07, "logits/chosen": -3.1371800899505615, "logits/rejected": -3.3831777572631836, "logps/chosen": -372.75347900390625, "logps/rejected": -202.69241333007812, "loss": 0.3879, "rewards/accuracies": 0.75, "rewards/chosen": 0.14730143547058105, "rewards/margins": 1.5739052295684814, "rewards/rejected": -1.42660391330719, "step": 2077 }, { "epoch": 0.24, "learning_rate": 2.316282336415779e-07, "logits/chosen": -3.3189592361450195, "logits/rejected": -3.094216823577881, "logps/chosen": -293.01885986328125, "logps/rejected": -193.8881072998047, "loss": 0.3246, "rewards/accuracies": 0.875, "rewards/chosen": 0.027197472751140594, "rewards/margins": 1.5831927061080933, "rewards/rejected": -1.555995225906372, "step": 2078 }, { "epoch": 0.24, "learning_rate": 2.3159311717195362e-07, "logits/chosen": -2.4798660278320312, "logits/rejected": -2.34287166595459, "logps/chosen": -218.00634765625, "logps/rejected": -256.8708801269531, "loss": 0.2574, "rewards/accuracies": 1.0, "rewards/chosen": 0.16102617979049683, "rewards/margins": 1.4642114639282227, "rewards/rejected": -1.303185224533081, "step": 2079 }, { "epoch": 0.24, "learning_rate": 2.3155800070232937e-07, "logits/chosen": -2.9939517974853516, "logits/rejected": -3.3702993392944336, "logps/chosen": -301.1270751953125, "logps/rejected": -247.39715576171875, "loss": 0.1962, "rewards/accuracies": 1.0, "rewards/chosen": 0.16677403450012207, "rewards/margins": 3.0479025840759277, "rewards/rejected": -2.8811285495758057, "step": 2080 }, { "epoch": 0.24, "learning_rate": 2.3152288423270513e-07, "logits/chosen": -3.201848030090332, "logits/rejected": -3.272211790084839, "logps/chosen": -164.5875244140625, "logps/rejected": -234.74484252929688, "loss": 0.4146, "rewards/accuracies": 0.875, "rewards/chosen": -0.17439353466033936, "rewards/margins": 1.2795435190200806, "rewards/rejected": -1.45393705368042, "step": 2081 }, { "epoch": 0.24, "learning_rate": 2.3148776776308085e-07, "logits/chosen": -3.4412922859191895, "logits/rejected": -3.293165922164917, "logps/chosen": -274.3909912109375, "logps/rejected": -241.2705841064453, "loss": 0.3418, "rewards/accuracies": 0.75, "rewards/chosen": 0.5082772970199585, "rewards/margins": 2.3811819553375244, "rewards/rejected": -1.8729045391082764, "step": 2082 }, { "epoch": 0.24, "learning_rate": 2.3145265129345664e-07, "logits/chosen": -2.939328670501709, "logits/rejected": -2.76759672164917, "logps/chosen": -234.68016052246094, "logps/rejected": -265.6005859375, "loss": 0.7198, "rewards/accuracies": 0.375, "rewards/chosen": -0.1831745207309723, "rewards/margins": 0.11841154098510742, "rewards/rejected": -0.3015860915184021, "step": 2083 }, { "epoch": 0.24, "learning_rate": 2.314175348238324e-07, "logits/chosen": -3.9192757606506348, "logits/rejected": -3.9525022506713867, "logps/chosen": -220.15306091308594, "logps/rejected": -256.71136474609375, "loss": 0.6264, "rewards/accuracies": 0.75, "rewards/chosen": -0.3700861930847168, "rewards/margins": 1.2942720651626587, "rewards/rejected": -1.6643580198287964, "step": 2084 }, { "epoch": 0.24, "learning_rate": 2.3138241835420812e-07, "logits/chosen": -2.8299174308776855, "logits/rejected": -3.205439567565918, "logps/chosen": -241.68505859375, "logps/rejected": -176.82130432128906, "loss": 0.2818, "rewards/accuracies": 0.875, "rewards/chosen": 0.5955206155776978, "rewards/margins": 1.596709966659546, "rewards/rejected": -1.0011893510818481, "step": 2085 }, { "epoch": 0.24, "learning_rate": 2.3134730188458387e-07, "logits/chosen": -3.220149040222168, "logits/rejected": -3.125932216644287, "logps/chosen": -458.686767578125, "logps/rejected": -328.49542236328125, "loss": 0.412, "rewards/accuracies": 0.625, "rewards/chosen": 0.08934766054153442, "rewards/margins": 1.6326868534088135, "rewards/rejected": -1.5433392524719238, "step": 2086 }, { "epoch": 0.24, "learning_rate": 2.313121854149596e-07, "logits/chosen": -2.034285306930542, "logits/rejected": -2.003303050994873, "logps/chosen": -530.0022583007812, "logps/rejected": -336.278564453125, "loss": 0.3998, "rewards/accuracies": 0.875, "rewards/chosen": 0.5011769533157349, "rewards/margins": 1.1516534090042114, "rewards/rejected": -0.650476336479187, "step": 2087 }, { "epoch": 0.24, "learning_rate": 2.3127706894533535e-07, "logits/chosen": -2.691768169403076, "logits/rejected": -2.978412628173828, "logps/chosen": -214.77301025390625, "logps/rejected": -369.90252685546875, "loss": 0.2466, "rewards/accuracies": 1.0, "rewards/chosen": 0.3249848484992981, "rewards/margins": 2.362424850463867, "rewards/rejected": -2.0374398231506348, "step": 2088 }, { "epoch": 0.24, "learning_rate": 2.312419524757111e-07, "logits/chosen": -3.6816093921661377, "logits/rejected": -3.253176689147949, "logps/chosen": -359.3407897949219, "logps/rejected": -330.1440124511719, "loss": 0.1486, "rewards/accuracies": 1.0, "rewards/chosen": 0.015503372997045517, "rewards/margins": 3.212719440460205, "rewards/rejected": -3.197216033935547, "step": 2089 }, { "epoch": 0.24, "learning_rate": 2.3120683600608683e-07, "logits/chosen": -3.1668877601623535, "logits/rejected": -3.0535621643066406, "logps/chosen": -400.6658020019531, "logps/rejected": -362.7208251953125, "loss": 0.6347, "rewards/accuracies": 0.5, "rewards/chosen": -0.6740178465843201, "rewards/margins": 0.5710243582725525, "rewards/rejected": -1.245042085647583, "step": 2090 }, { "epoch": 0.24, "learning_rate": 2.3117171953646259e-07, "logits/chosen": -2.8681182861328125, "logits/rejected": -2.6557397842407227, "logps/chosen": -106.12987518310547, "logps/rejected": -183.69265747070312, "loss": 0.5506, "rewards/accuracies": 0.75, "rewards/chosen": 0.2046343982219696, "rewards/margins": 1.1567753553390503, "rewards/rejected": -0.9521410465240479, "step": 2091 }, { "epoch": 0.24, "learning_rate": 2.3113660306683831e-07, "logits/chosen": -3.3728270530700684, "logits/rejected": -3.6698052883148193, "logps/chosen": -379.8182678222656, "logps/rejected": -254.52731323242188, "loss": 0.3957, "rewards/accuracies": 0.75, "rewards/chosen": -0.03467864543199539, "rewards/margins": 1.7818523645401, "rewards/rejected": -1.8165309429168701, "step": 2092 }, { "epoch": 0.24, "learning_rate": 2.3110148659721407e-07, "logits/chosen": -2.8968770503997803, "logits/rejected": -2.829472303390503, "logps/chosen": -247.21621704101562, "logps/rejected": -295.7564697265625, "loss": 0.5919, "rewards/accuracies": 0.75, "rewards/chosen": 0.14039793610572815, "rewards/margins": 0.5136997699737549, "rewards/rejected": -0.3733018934726715, "step": 2093 }, { "epoch": 0.24, "learning_rate": 2.3106637012758985e-07, "logits/chosen": -3.104576587677002, "logits/rejected": -3.4068703651428223, "logps/chosen": -176.1011199951172, "logps/rejected": -160.91513061523438, "loss": 0.5162, "rewards/accuracies": 0.75, "rewards/chosen": -0.2899596393108368, "rewards/margins": 0.9834280014038086, "rewards/rejected": -1.2733876705169678, "step": 2094 }, { "epoch": 0.24, "learning_rate": 2.3103125365796558e-07, "logits/chosen": -3.8687262535095215, "logits/rejected": -3.768993854522705, "logps/chosen": -199.6669921875, "logps/rejected": -169.57044982910156, "loss": 0.4006, "rewards/accuracies": 0.75, "rewards/chosen": 0.3814202845096588, "rewards/margins": 1.068402886390686, "rewards/rejected": -0.6869826316833496, "step": 2095 }, { "epoch": 0.24, "learning_rate": 2.3099613718834133e-07, "logits/chosen": -2.8024728298187256, "logits/rejected": -2.988694906234741, "logps/chosen": -268.6394958496094, "logps/rejected": -374.6436462402344, "loss": 0.8894, "rewards/accuracies": 0.75, "rewards/chosen": -0.8080625534057617, "rewards/margins": 0.7284382581710815, "rewards/rejected": -1.5365009307861328, "step": 2096 }, { "epoch": 0.24, "learning_rate": 2.3096102071871708e-07, "logits/chosen": -2.6128599643707275, "logits/rejected": -2.492452383041382, "logps/chosen": -300.01947021484375, "logps/rejected": -275.435791015625, "loss": 0.4176, "rewards/accuracies": 0.875, "rewards/chosen": 0.17781177163124084, "rewards/margins": 1.2151402235031128, "rewards/rejected": -1.0373283624649048, "step": 2097 }, { "epoch": 0.24, "learning_rate": 2.309259042490928e-07, "logits/chosen": -3.0148143768310547, "logits/rejected": -3.1258163452148438, "logps/chosen": -164.94236755371094, "logps/rejected": -166.98150634765625, "loss": 0.5652, "rewards/accuracies": 0.875, "rewards/chosen": 0.29530441761016846, "rewards/margins": 0.7061986923217773, "rewards/rejected": -0.4108943045139313, "step": 2098 }, { "epoch": 0.24, "learning_rate": 2.3089078777946856e-07, "logits/chosen": -3.3342514038085938, "logits/rejected": -3.188351631164551, "logps/chosen": -161.34930419921875, "logps/rejected": -229.7317657470703, "loss": 0.1732, "rewards/accuracies": 1.0, "rewards/chosen": 0.24056200683116913, "rewards/margins": 2.5383083820343018, "rewards/rejected": -2.297746419906616, "step": 2099 }, { "epoch": 0.24, "learning_rate": 2.308556713098443e-07, "logits/chosen": -2.6050167083740234, "logits/rejected": -2.6193628311157227, "logps/chosen": -354.6103515625, "logps/rejected": -367.6568298339844, "loss": 0.5786, "rewards/accuracies": 0.75, "rewards/chosen": 0.20415526628494263, "rewards/margins": 0.8980470895767212, "rewards/rejected": -0.6938918828964233, "step": 2100 }, { "epoch": 0.24, "learning_rate": 2.3082055484022005e-07, "logits/chosen": -2.9589338302612305, "logits/rejected": -2.891653537750244, "logps/chosen": -156.6099853515625, "logps/rejected": -174.7583770751953, "loss": 0.5957, "rewards/accuracies": 0.5, "rewards/chosen": -0.2888055145740509, "rewards/margins": 0.4744884669780731, "rewards/rejected": -0.7632939219474792, "step": 2101 }, { "epoch": 0.24, "learning_rate": 2.307854383705958e-07, "logits/chosen": -2.8973965644836426, "logits/rejected": -2.8866379261016846, "logps/chosen": -188.91928100585938, "logps/rejected": -225.80870056152344, "loss": 0.4731, "rewards/accuracies": 0.75, "rewards/chosen": -0.06485879421234131, "rewards/margins": 1.3465664386749268, "rewards/rejected": -1.411425232887268, "step": 2102 }, { "epoch": 0.24, "learning_rate": 2.3075032190097153e-07, "logits/chosen": -3.013587236404419, "logits/rejected": -2.985133171081543, "logps/chosen": -226.9334259033203, "logps/rejected": -255.89234924316406, "loss": 0.263, "rewards/accuracies": 0.875, "rewards/chosen": -0.1455492079257965, "rewards/margins": 2.1877567768096924, "rewards/rejected": -2.333305835723877, "step": 2103 }, { "epoch": 0.24, "learning_rate": 2.3071520543134728e-07, "logits/chosen": -3.0888326168060303, "logits/rejected": -3.1155600547790527, "logps/chosen": -211.76461791992188, "logps/rejected": -248.92959594726562, "loss": 0.3834, "rewards/accuracies": 0.75, "rewards/chosen": 0.30154404044151306, "rewards/margins": 1.7661001682281494, "rewards/rejected": -1.4645562171936035, "step": 2104 }, { "epoch": 0.24, "learning_rate": 2.3068008896172306e-07, "logits/chosen": -3.4710254669189453, "logits/rejected": -3.2883238792419434, "logps/chosen": -241.4379425048828, "logps/rejected": -137.45843505859375, "loss": 0.5681, "rewards/accuracies": 0.75, "rewards/chosen": -0.36827781796455383, "rewards/margins": 1.1108441352844238, "rewards/rejected": -1.4791220426559448, "step": 2105 }, { "epoch": 0.24, "learning_rate": 2.306449724920988e-07, "logits/chosen": -3.1294000148773193, "logits/rejected": -3.027726650238037, "logps/chosen": -258.2811279296875, "logps/rejected": -280.6197204589844, "loss": 0.6973, "rewards/accuracies": 0.625, "rewards/chosen": -0.2621428072452545, "rewards/margins": 0.6313274502754211, "rewards/rejected": -0.893470287322998, "step": 2106 }, { "epoch": 0.24, "learning_rate": 2.3060985602247454e-07, "logits/chosen": -3.519259452819824, "logits/rejected": -3.564070701599121, "logps/chosen": -368.4876403808594, "logps/rejected": -262.3271789550781, "loss": 0.3158, "rewards/accuracies": 1.0, "rewards/chosen": 0.15722541511058807, "rewards/margins": 1.3306314945220947, "rewards/rejected": -1.1734061241149902, "step": 2107 }, { "epoch": 0.24, "learning_rate": 2.3057473955285027e-07, "logits/chosen": -2.4241862297058105, "logits/rejected": -2.557363510131836, "logps/chosen": -350.5266418457031, "logps/rejected": -244.41880798339844, "loss": 0.7731, "rewards/accuracies": 0.625, "rewards/chosen": -0.1726996898651123, "rewards/margins": 0.4294752776622772, "rewards/rejected": -0.6021749377250671, "step": 2108 }, { "epoch": 0.24, "learning_rate": 2.3053962308322602e-07, "logits/chosen": -3.217029333114624, "logits/rejected": -2.760246992111206, "logps/chosen": -170.1758575439453, "logps/rejected": -248.49200439453125, "loss": 0.5335, "rewards/accuracies": 0.75, "rewards/chosen": -0.15970605611801147, "rewards/margins": 2.787921905517578, "rewards/rejected": -2.9476280212402344, "step": 2109 }, { "epoch": 0.24, "learning_rate": 2.3050450661360178e-07, "logits/chosen": -2.86850643157959, "logits/rejected": -2.8641371726989746, "logps/chosen": -341.783447265625, "logps/rejected": -363.9459228515625, "loss": 0.6159, "rewards/accuracies": 0.75, "rewards/chosen": -0.4317886233329773, "rewards/margins": 1.6602858304977417, "rewards/rejected": -2.092074394226074, "step": 2110 }, { "epoch": 0.24, "learning_rate": 2.304693901439775e-07, "logits/chosen": -2.5138039588928223, "logits/rejected": -2.5743088722229004, "logps/chosen": -417.4977722167969, "logps/rejected": -359.70416259765625, "loss": 0.393, "rewards/accuracies": 0.875, "rewards/chosen": 0.03080321103334427, "rewards/margins": 1.373356580734253, "rewards/rejected": -1.3425533771514893, "step": 2111 }, { "epoch": 0.24, "learning_rate": 2.3043427367435326e-07, "logits/chosen": -2.5206520557403564, "logits/rejected": -2.533428192138672, "logps/chosen": -369.29827880859375, "logps/rejected": -283.76068115234375, "loss": 0.1784, "rewards/accuracies": 1.0, "rewards/chosen": 0.7609325051307678, "rewards/margins": 2.096494674682617, "rewards/rejected": -1.3355623483657837, "step": 2112 }, { "epoch": 0.24, "learning_rate": 2.30399157204729e-07, "logits/chosen": -3.4599108695983887, "logits/rejected": -3.4545326232910156, "logps/chosen": -190.36834716796875, "logps/rejected": -267.7120361328125, "loss": 0.2081, "rewards/accuracies": 0.875, "rewards/chosen": -0.46892499923706055, "rewards/margins": 2.933673858642578, "rewards/rejected": -3.4025988578796387, "step": 2113 }, { "epoch": 0.24, "learning_rate": 2.3036404073510474e-07, "logits/chosen": -3.257903814315796, "logits/rejected": -3.3460612297058105, "logps/chosen": -206.3463134765625, "logps/rejected": -219.52035522460938, "loss": 0.5451, "rewards/accuracies": 0.75, "rewards/chosen": 0.08126439899206161, "rewards/margins": 0.7224301099777222, "rewards/rejected": -0.6411657333374023, "step": 2114 }, { "epoch": 0.24, "learning_rate": 2.303289242654805e-07, "logits/chosen": -3.4247398376464844, "logits/rejected": -3.5034990310668945, "logps/chosen": -135.21697998046875, "logps/rejected": -168.78558349609375, "loss": 0.7413, "rewards/accuracies": 0.625, "rewards/chosen": -0.40579739212989807, "rewards/margins": 2.020038604736328, "rewards/rejected": -2.4258358478546143, "step": 2115 }, { "epoch": 0.24, "learning_rate": 2.3029380779585622e-07, "logits/chosen": -3.3096399307250977, "logits/rejected": -3.077197551727295, "logps/chosen": -410.85015869140625, "logps/rejected": -367.3661804199219, "loss": 0.2309, "rewards/accuracies": 1.0, "rewards/chosen": 0.39888066053390503, "rewards/margins": 1.8421542644500732, "rewards/rejected": -1.4432735443115234, "step": 2116 }, { "epoch": 0.24, "learning_rate": 2.30258691326232e-07, "logits/chosen": -3.084416389465332, "logits/rejected": -3.2645788192749023, "logps/chosen": -209.12745666503906, "logps/rejected": -261.8200988769531, "loss": 0.3381, "rewards/accuracies": 0.875, "rewards/chosen": -0.21082201600074768, "rewards/margins": 1.3201878070831299, "rewards/rejected": -1.5310097932815552, "step": 2117 }, { "epoch": 0.24, "learning_rate": 2.3022357485660776e-07, "logits/chosen": -3.0738563537597656, "logits/rejected": -2.97420072555542, "logps/chosen": -150.8150634765625, "logps/rejected": -225.51405334472656, "loss": 0.3607, "rewards/accuracies": 0.75, "rewards/chosen": -0.034167468547821045, "rewards/margins": 1.929117202758789, "rewards/rejected": -1.9632846117019653, "step": 2118 }, { "epoch": 0.24, "learning_rate": 2.3018845838698348e-07, "logits/chosen": -2.6215896606445312, "logits/rejected": -2.531309127807617, "logps/chosen": -325.0052490234375, "logps/rejected": -288.5998840332031, "loss": 0.3867, "rewards/accuracies": 0.75, "rewards/chosen": 0.17428207397460938, "rewards/margins": 1.0078134536743164, "rewards/rejected": -0.8335314393043518, "step": 2119 }, { "epoch": 0.24, "learning_rate": 2.3015334191735924e-07, "logits/chosen": -3.269624710083008, "logits/rejected": -3.0596323013305664, "logps/chosen": -446.0211486816406, "logps/rejected": -192.98489379882812, "loss": 0.3913, "rewards/accuracies": 0.75, "rewards/chosen": 0.5741363167762756, "rewards/margins": 1.5559260845184326, "rewards/rejected": -0.9817897081375122, "step": 2120 }, { "epoch": 0.24, "learning_rate": 2.30118225447735e-07, "logits/chosen": -3.132638454437256, "logits/rejected": -3.095646619796753, "logps/chosen": -306.4154968261719, "logps/rejected": -255.46347045898438, "loss": 0.5111, "rewards/accuracies": 0.75, "rewards/chosen": -0.4185672402381897, "rewards/margins": 1.0858190059661865, "rewards/rejected": -1.5043861865997314, "step": 2121 }, { "epoch": 0.24, "learning_rate": 2.3008310897811072e-07, "logits/chosen": -3.2063019275665283, "logits/rejected": -3.1988487243652344, "logps/chosen": -432.0948486328125, "logps/rejected": -280.7615966796875, "loss": 0.4917, "rewards/accuracies": 0.625, "rewards/chosen": 0.06945712864398956, "rewards/margins": 1.3280484676361084, "rewards/rejected": -1.2585911750793457, "step": 2122 }, { "epoch": 0.24, "learning_rate": 2.3004799250848647e-07, "logits/chosen": -2.981205463409424, "logits/rejected": -3.1691946983337402, "logps/chosen": -139.61550903320312, "logps/rejected": -225.16111755371094, "loss": 0.3533, "rewards/accuracies": 0.75, "rewards/chosen": 0.32160258293151855, "rewards/margins": 1.6840468645095825, "rewards/rejected": -1.362444281578064, "step": 2123 }, { "epoch": 0.24, "learning_rate": 2.300128760388622e-07, "logits/chosen": -3.129077911376953, "logits/rejected": -3.0811550617218018, "logps/chosen": -416.6397705078125, "logps/rejected": -304.04876708984375, "loss": 0.4072, "rewards/accuracies": 0.75, "rewards/chosen": 0.4374493658542633, "rewards/margins": 2.1136257648468018, "rewards/rejected": -1.6761763095855713, "step": 2124 }, { "epoch": 0.24, "learning_rate": 2.2997775956923795e-07, "logits/chosen": -3.1177971363067627, "logits/rejected": -3.068120002746582, "logps/chosen": -261.89117431640625, "logps/rejected": -259.0257568359375, "loss": 0.3158, "rewards/accuracies": 1.0, "rewards/chosen": -0.17802907526493073, "rewards/margins": 1.4171087741851807, "rewards/rejected": -1.5951377153396606, "step": 2125 }, { "epoch": 0.25, "learning_rate": 2.2994264309961373e-07, "logits/chosen": -3.05033802986145, "logits/rejected": -2.8604319095611572, "logps/chosen": -484.1619567871094, "logps/rejected": -488.71258544921875, "loss": 0.2581, "rewards/accuracies": 0.875, "rewards/chosen": -0.07930582761764526, "rewards/margins": 2.209566831588745, "rewards/rejected": -2.288872718811035, "step": 2126 }, { "epoch": 0.25, "learning_rate": 2.2990752662998943e-07, "logits/chosen": -3.3130016326904297, "logits/rejected": -3.182955741882324, "logps/chosen": -242.98472595214844, "logps/rejected": -311.72210693359375, "loss": 0.5221, "rewards/accuracies": 0.75, "rewards/chosen": 0.08628827333450317, "rewards/margins": 1.0533286333084106, "rewards/rejected": -0.9670404195785522, "step": 2127 }, { "epoch": 0.25, "learning_rate": 2.2987241016036521e-07, "logits/chosen": -2.757323741912842, "logits/rejected": -2.734171152114868, "logps/chosen": -228.89212036132812, "logps/rejected": -325.0506896972656, "loss": 0.45, "rewards/accuracies": 0.75, "rewards/chosen": 0.12936830520629883, "rewards/margins": 0.9810525178909302, "rewards/rejected": -0.8516842126846313, "step": 2128 }, { "epoch": 0.25, "learning_rate": 2.2983729369074097e-07, "logits/chosen": -2.6155567169189453, "logits/rejected": -2.8236327171325684, "logps/chosen": -230.96731567382812, "logps/rejected": -174.28579711914062, "loss": 0.3177, "rewards/accuracies": 0.75, "rewards/chosen": 0.15025682747364044, "rewards/margins": 1.6429872512817383, "rewards/rejected": -1.4927302598953247, "step": 2129 }, { "epoch": 0.25, "learning_rate": 2.298021772211167e-07, "logits/chosen": -2.4323248863220215, "logits/rejected": -2.529294013977051, "logps/chosen": -558.7015991210938, "logps/rejected": -296.4462890625, "loss": 0.3067, "rewards/accuracies": 0.875, "rewards/chosen": 0.6510332822799683, "rewards/margins": 1.8151662349700928, "rewards/rejected": -1.164132833480835, "step": 2130 }, { "epoch": 0.25, "learning_rate": 2.2976706075149245e-07, "logits/chosen": -3.9073331356048584, "logits/rejected": -4.021336555480957, "logps/chosen": -196.57164001464844, "logps/rejected": -162.32533264160156, "loss": 0.5155, "rewards/accuracies": 0.5, "rewards/chosen": 0.33393171429634094, "rewards/margins": 1.0446873903274536, "rewards/rejected": -0.7107555866241455, "step": 2131 }, { "epoch": 0.25, "learning_rate": 2.2973194428186818e-07, "logits/chosen": -3.0298361778259277, "logits/rejected": -3.081550121307373, "logps/chosen": -319.3509826660156, "logps/rejected": -316.0582275390625, "loss": 0.5098, "rewards/accuracies": 0.625, "rewards/chosen": -0.3999989628791809, "rewards/margins": 0.7974104881286621, "rewards/rejected": -1.1974093914031982, "step": 2132 }, { "epoch": 0.25, "learning_rate": 2.2969682781224393e-07, "logits/chosen": -2.8984832763671875, "logits/rejected": -2.7823643684387207, "logps/chosen": -254.94943237304688, "logps/rejected": -154.06027221679688, "loss": 0.8982, "rewards/accuracies": 0.25, "rewards/chosen": -0.48639917373657227, "rewards/margins": -0.02118399739265442, "rewards/rejected": -0.46521520614624023, "step": 2133 }, { "epoch": 0.25, "learning_rate": 2.2966171134261968e-07, "logits/chosen": -2.907137393951416, "logits/rejected": -2.778745651245117, "logps/chosen": -366.7825012207031, "logps/rejected": -330.40545654296875, "loss": 0.6041, "rewards/accuracies": 0.625, "rewards/chosen": 0.25338396430015564, "rewards/margins": 1.3865423202514648, "rewards/rejected": -1.1331582069396973, "step": 2134 }, { "epoch": 0.25, "learning_rate": 2.296265948729954e-07, "logits/chosen": -2.639051675796509, "logits/rejected": -2.8714752197265625, "logps/chosen": -328.53765869140625, "logps/rejected": -214.4018096923828, "loss": 0.7175, "rewards/accuracies": 0.5, "rewards/chosen": -0.2575297951698303, "rewards/margins": 0.008671015501022339, "rewards/rejected": -0.26620084047317505, "step": 2135 }, { "epoch": 0.25, "learning_rate": 2.2959147840337117e-07, "logits/chosen": -3.967879056930542, "logits/rejected": -3.6781632900238037, "logps/chosen": -294.2057189941406, "logps/rejected": -185.2998046875, "loss": 0.3645, "rewards/accuracies": 1.0, "rewards/chosen": 0.17556343972682953, "rewards/margins": 1.1041561365127563, "rewards/rejected": -0.9285928010940552, "step": 2136 }, { "epoch": 0.25, "learning_rate": 2.295563619337469e-07, "logits/chosen": -3.4967775344848633, "logits/rejected": -3.530625343322754, "logps/chosen": -292.8888244628906, "logps/rejected": -278.8633728027344, "loss": 0.3244, "rewards/accuracies": 0.875, "rewards/chosen": -0.02556251734495163, "rewards/margins": 1.678627610206604, "rewards/rejected": -1.7041900157928467, "step": 2137 }, { "epoch": 0.25, "learning_rate": 2.2952124546412265e-07, "logits/chosen": -3.174499988555908, "logits/rejected": -3.5422496795654297, "logps/chosen": -97.04664611816406, "logps/rejected": -201.9088592529297, "loss": 0.498, "rewards/accuracies": 0.75, "rewards/chosen": -0.26214560866355896, "rewards/margins": 1.7778913974761963, "rewards/rejected": -2.040037155151367, "step": 2138 }, { "epoch": 0.25, "learning_rate": 2.2948612899449843e-07, "logits/chosen": -3.057988405227661, "logits/rejected": -2.7497944831848145, "logps/chosen": -143.536865234375, "logps/rejected": -136.38714599609375, "loss": 0.3324, "rewards/accuracies": 0.875, "rewards/chosen": -0.36133721470832825, "rewards/margins": 1.2990679740905762, "rewards/rejected": -1.6604052782058716, "step": 2139 }, { "epoch": 0.25, "learning_rate": 2.2945101252487415e-07, "logits/chosen": -3.433320999145508, "logits/rejected": -3.2139768600463867, "logps/chosen": -195.34268188476562, "logps/rejected": -181.19691467285156, "loss": 0.6035, "rewards/accuracies": 0.75, "rewards/chosen": -0.10047314316034317, "rewards/margins": 0.6875261664390564, "rewards/rejected": -0.7879993319511414, "step": 2140 }, { "epoch": 0.25, "learning_rate": 2.294158960552499e-07, "logits/chosen": -3.0975584983825684, "logits/rejected": -2.981736660003662, "logps/chosen": -435.87939453125, "logps/rejected": -239.91470336914062, "loss": 0.3697, "rewards/accuracies": 0.625, "rewards/chosen": -0.030903533101081848, "rewards/margins": 1.337636947631836, "rewards/rejected": -1.3685402870178223, "step": 2141 }, { "epoch": 0.25, "learning_rate": 2.2938077958562566e-07, "logits/chosen": -2.9738893508911133, "logits/rejected": -2.558286190032959, "logps/chosen": -214.34483337402344, "logps/rejected": -250.37782287597656, "loss": 0.5872, "rewards/accuracies": 0.625, "rewards/chosen": -0.6212382912635803, "rewards/margins": 0.658962607383728, "rewards/rejected": -1.2802008390426636, "step": 2142 }, { "epoch": 0.25, "learning_rate": 2.293456631160014e-07, "logits/chosen": -3.1387698650360107, "logits/rejected": -3.114185333251953, "logps/chosen": -390.3049621582031, "logps/rejected": -325.4262390136719, "loss": 0.492, "rewards/accuracies": 0.75, "rewards/chosen": -0.07930421084165573, "rewards/margins": 1.013663649559021, "rewards/rejected": -1.0929678678512573, "step": 2143 }, { "epoch": 0.25, "learning_rate": 2.2931054664637714e-07, "logits/chosen": -3.1458261013031006, "logits/rejected": -3.1077966690063477, "logps/chosen": -311.1947326660156, "logps/rejected": -292.24249267578125, "loss": 0.2497, "rewards/accuracies": 1.0, "rewards/chosen": -0.018325001001358032, "rewards/margins": 2.187077760696411, "rewards/rejected": -2.2054026126861572, "step": 2144 }, { "epoch": 0.25, "learning_rate": 2.2927543017675287e-07, "logits/chosen": -2.2506399154663086, "logits/rejected": -2.366401433944702, "logps/chosen": -443.18243408203125, "logps/rejected": -272.187744140625, "loss": 0.3243, "rewards/accuracies": 0.75, "rewards/chosen": 0.285902202129364, "rewards/margins": 1.2258096933364868, "rewards/rejected": -0.9399075508117676, "step": 2145 }, { "epoch": 0.25, "learning_rate": 2.2924031370712863e-07, "logits/chosen": -3.9018802642822266, "logits/rejected": -3.819483995437622, "logps/chosen": -310.84307861328125, "logps/rejected": -324.6479187011719, "loss": 0.1897, "rewards/accuracies": 0.875, "rewards/chosen": 0.6406594514846802, "rewards/margins": 3.827509880065918, "rewards/rejected": -3.1868505477905273, "step": 2146 }, { "epoch": 0.25, "learning_rate": 2.2920519723750438e-07, "logits/chosen": -3.35387921333313, "logits/rejected": -3.498723268508911, "logps/chosen": -264.9539794921875, "logps/rejected": -240.6407470703125, "loss": 0.28, "rewards/accuracies": 1.0, "rewards/chosen": -0.1976519525051117, "rewards/margins": 1.7366485595703125, "rewards/rejected": -1.934300422668457, "step": 2147 }, { "epoch": 0.25, "learning_rate": 2.291700807678801e-07, "logits/chosen": -2.9523720741271973, "logits/rejected": -3.0167789459228516, "logps/chosen": -221.262939453125, "logps/rejected": -213.5052490234375, "loss": 0.3951, "rewards/accuracies": 0.875, "rewards/chosen": -0.32022199034690857, "rewards/margins": 1.2957648038864136, "rewards/rejected": -1.6159868240356445, "step": 2148 }, { "epoch": 0.25, "learning_rate": 2.2913496429825586e-07, "logits/chosen": -3.675983428955078, "logits/rejected": -3.4586997032165527, "logps/chosen": -255.3311767578125, "logps/rejected": -206.3752899169922, "loss": 0.8866, "rewards/accuracies": 0.375, "rewards/chosen": -0.8855583667755127, "rewards/margins": -0.09528318047523499, "rewards/rejected": -0.7902751564979553, "step": 2149 }, { "epoch": 0.25, "learning_rate": 2.2909984782863164e-07, "logits/chosen": -3.1246275901794434, "logits/rejected": -2.9770803451538086, "logps/chosen": -306.7972106933594, "logps/rejected": -369.6507873535156, "loss": 0.2454, "rewards/accuracies": 1.0, "rewards/chosen": -0.05419187992811203, "rewards/margins": 1.6424165964126587, "rewards/rejected": -1.6966084241867065, "step": 2150 }, { "epoch": 0.25, "learning_rate": 2.2906473135900737e-07, "logits/chosen": -2.799499988555908, "logits/rejected": -2.881180763244629, "logps/chosen": -280.1490478515625, "logps/rejected": -216.60858154296875, "loss": 0.5795, "rewards/accuracies": 0.5, "rewards/chosen": -0.058351997286081314, "rewards/margins": 0.6957638263702393, "rewards/rejected": -0.754115879535675, "step": 2151 }, { "epoch": 0.25, "learning_rate": 2.2902961488938312e-07, "logits/chosen": -3.0426626205444336, "logits/rejected": -2.8782923221588135, "logps/chosen": -199.07327270507812, "logps/rejected": -176.74562072753906, "loss": 0.5265, "rewards/accuracies": 0.875, "rewards/chosen": -0.3715325593948364, "rewards/margins": 0.44901910424232483, "rewards/rejected": -0.8205517530441284, "step": 2152 }, { "epoch": 0.25, "learning_rate": 2.2899449841975885e-07, "logits/chosen": -3.3847169876098633, "logits/rejected": -3.440019130706787, "logps/chosen": -199.2718048095703, "logps/rejected": -242.8067626953125, "loss": 0.1795, "rewards/accuracies": 1.0, "rewards/chosen": 0.6725295186042786, "rewards/margins": 2.372034788131714, "rewards/rejected": -1.69950532913208, "step": 2153 }, { "epoch": 0.25, "learning_rate": 2.289593819501346e-07, "logits/chosen": -3.2121388912200928, "logits/rejected": -3.1259593963623047, "logps/chosen": -77.61213684082031, "logps/rejected": -260.663818359375, "loss": 0.2035, "rewards/accuracies": 1.0, "rewards/chosen": 0.5566335916519165, "rewards/margins": 2.4029579162597656, "rewards/rejected": -1.8463245630264282, "step": 2154 }, { "epoch": 0.25, "learning_rate": 2.2892426548051036e-07, "logits/chosen": -3.8060905933380127, "logits/rejected": -4.063305854797363, "logps/chosen": -96.44733428955078, "logps/rejected": -146.20587158203125, "loss": 0.2959, "rewards/accuracies": 1.0, "rewards/chosen": 0.15789060294628143, "rewards/margins": 1.3584789037704468, "rewards/rejected": -1.200588345527649, "step": 2155 }, { "epoch": 0.25, "learning_rate": 2.2888914901088608e-07, "logits/chosen": -3.04556941986084, "logits/rejected": -2.8953006267547607, "logps/chosen": -322.14044189453125, "logps/rejected": -291.58837890625, "loss": 0.3588, "rewards/accuracies": 0.75, "rewards/chosen": 0.05088639259338379, "rewards/margins": 1.2984827756881714, "rewards/rejected": -1.2475963830947876, "step": 2156 }, { "epoch": 0.25, "learning_rate": 2.2885403254126184e-07, "logits/chosen": -2.5830042362213135, "logits/rejected": -2.5951828956604004, "logps/chosen": -190.1724853515625, "logps/rejected": -248.76187133789062, "loss": 0.4957, "rewards/accuracies": 0.75, "rewards/chosen": -0.5667991042137146, "rewards/margins": 0.7928729057312012, "rewards/rejected": -1.359671950340271, "step": 2157 }, { "epoch": 0.25, "learning_rate": 2.288189160716376e-07, "logits/chosen": -3.824737071990967, "logits/rejected": -3.4306282997131348, "logps/chosen": -316.813232421875, "logps/rejected": -212.4075469970703, "loss": 0.5233, "rewards/accuracies": 0.625, "rewards/chosen": 0.1164102703332901, "rewards/margins": 1.068872094154358, "rewards/rejected": -0.952461838722229, "step": 2158 }, { "epoch": 0.25, "learning_rate": 2.2878379960201332e-07, "logits/chosen": -2.5841102600097656, "logits/rejected": -2.3273773193359375, "logps/chosen": -341.92291259765625, "logps/rejected": -198.899658203125, "loss": 0.5798, "rewards/accuracies": 0.625, "rewards/chosen": -0.15881577134132385, "rewards/margins": 0.6533850431442261, "rewards/rejected": -0.8122007846832275, "step": 2159 }, { "epoch": 0.25, "learning_rate": 2.287486831323891e-07, "logits/chosen": -2.1588921546936035, "logits/rejected": -2.281071662902832, "logps/chosen": -338.48046875, "logps/rejected": -367.73541259765625, "loss": 0.3756, "rewards/accuracies": 0.875, "rewards/chosen": 0.6069843769073486, "rewards/margins": 1.4542444944381714, "rewards/rejected": -0.8472601175308228, "step": 2160 }, { "epoch": 0.25, "learning_rate": 2.287135666627648e-07, "logits/chosen": -3.171647310256958, "logits/rejected": -2.9164974689483643, "logps/chosen": -246.63812255859375, "logps/rejected": -244.59567260742188, "loss": 0.4204, "rewards/accuracies": 0.75, "rewards/chosen": -0.4366302490234375, "rewards/margins": 1.1840856075286865, "rewards/rejected": -1.6207157373428345, "step": 2161 }, { "epoch": 0.25, "learning_rate": 2.2867845019314058e-07, "logits/chosen": -3.344777822494507, "logits/rejected": -3.183298349380493, "logps/chosen": -380.3085632324219, "logps/rejected": -311.85821533203125, "loss": 0.5028, "rewards/accuracies": 0.875, "rewards/chosen": -0.21874305605888367, "rewards/margins": 1.9667285680770874, "rewards/rejected": -2.185471534729004, "step": 2162 }, { "epoch": 0.25, "learning_rate": 2.2864333372351633e-07, "logits/chosen": -3.1040329933166504, "logits/rejected": -2.866542100906372, "logps/chosen": -274.49066162109375, "logps/rejected": -221.03579711914062, "loss": 0.4141, "rewards/accuracies": 0.875, "rewards/chosen": 0.004150062799453735, "rewards/margins": 1.3637328147888184, "rewards/rejected": -1.359582781791687, "step": 2163 }, { "epoch": 0.25, "learning_rate": 2.2860821725389206e-07, "logits/chosen": -3.423794984817505, "logits/rejected": -3.181846857070923, "logps/chosen": -198.95223999023438, "logps/rejected": -207.7142791748047, "loss": 0.4869, "rewards/accuracies": 0.75, "rewards/chosen": -0.19943265616893768, "rewards/margins": 1.7241932153701782, "rewards/rejected": -1.9236259460449219, "step": 2164 }, { "epoch": 0.25, "learning_rate": 2.2857310078426782e-07, "logits/chosen": -3.3602797985076904, "logits/rejected": -3.5906894207000732, "logps/chosen": -374.57989501953125, "logps/rejected": -455.7204284667969, "loss": 0.2646, "rewards/accuracies": 1.0, "rewards/chosen": 0.3703863322734833, "rewards/margins": 1.5944361686706543, "rewards/rejected": -1.2240499258041382, "step": 2165 }, { "epoch": 0.25, "learning_rate": 2.2853798431464357e-07, "logits/chosen": -3.4440345764160156, "logits/rejected": -3.7901735305786133, "logps/chosen": -335.39263916015625, "logps/rejected": -258.5957946777344, "loss": 0.1394, "rewards/accuracies": 1.0, "rewards/chosen": 0.04834523797035217, "rewards/margins": 2.580047845840454, "rewards/rejected": -2.5317025184631348, "step": 2166 }, { "epoch": 0.25, "learning_rate": 2.285028678450193e-07, "logits/chosen": -2.877209424972534, "logits/rejected": -3.0024654865264893, "logps/chosen": -196.6121063232422, "logps/rejected": -253.6957244873047, "loss": 0.378, "rewards/accuracies": 0.75, "rewards/chosen": -0.32331985235214233, "rewards/margins": 1.4587980508804321, "rewards/rejected": -1.7821178436279297, "step": 2167 }, { "epoch": 0.25, "learning_rate": 2.2846775137539505e-07, "logits/chosen": -3.247687339782715, "logits/rejected": -2.576735734939575, "logps/chosen": -266.5221252441406, "logps/rejected": -134.5208282470703, "loss": 0.3538, "rewards/accuracies": 0.875, "rewards/chosen": 0.14202986657619476, "rewards/margins": 1.1841765642166138, "rewards/rejected": -1.0421466827392578, "step": 2168 }, { "epoch": 0.25, "learning_rate": 2.2843263490577078e-07, "logits/chosen": -3.1439571380615234, "logits/rejected": -3.1819911003112793, "logps/chosen": -293.97064208984375, "logps/rejected": -315.087646484375, "loss": 0.2163, "rewards/accuracies": 1.0, "rewards/chosen": 0.5528956651687622, "rewards/margins": 2.2351021766662598, "rewards/rejected": -1.682206630706787, "step": 2169 }, { "epoch": 0.25, "learning_rate": 2.2839751843614653e-07, "logits/chosen": -3.42669677734375, "logits/rejected": -3.2778358459472656, "logps/chosen": -297.4933776855469, "logps/rejected": -259.46990966796875, "loss": 0.4767, "rewards/accuracies": 0.875, "rewards/chosen": 0.07736729085445404, "rewards/margins": 1.8937163352966309, "rewards/rejected": -1.8163491487503052, "step": 2170 }, { "epoch": 0.25, "learning_rate": 2.283624019665223e-07, "logits/chosen": -3.3399159908294678, "logits/rejected": -2.733170509338379, "logps/chosen": -364.28509521484375, "logps/rejected": -177.2841796875, "loss": 0.3621, "rewards/accuracies": 0.625, "rewards/chosen": 0.13456812500953674, "rewards/margins": 1.6440664529800415, "rewards/rejected": -1.5094983577728271, "step": 2171 }, { "epoch": 0.25, "learning_rate": 2.2832728549689801e-07, "logits/chosen": -3.493020534515381, "logits/rejected": -3.351998805999756, "logps/chosen": -238.95553588867188, "logps/rejected": -194.61776733398438, "loss": 0.7171, "rewards/accuracies": 0.625, "rewards/chosen": -0.10810390114784241, "rewards/margins": 1.0185710191726685, "rewards/rejected": -1.1266748905181885, "step": 2172 }, { "epoch": 0.25, "learning_rate": 2.282921690272738e-07, "logits/chosen": -3.4962539672851562, "logits/rejected": -3.570619821548462, "logps/chosen": -276.0008544921875, "logps/rejected": -332.01263427734375, "loss": 0.2421, "rewards/accuracies": 1.0, "rewards/chosen": 0.5366507768630981, "rewards/margins": 1.9456170797348022, "rewards/rejected": -1.408966302871704, "step": 2173 }, { "epoch": 0.25, "learning_rate": 2.2825705255764955e-07, "logits/chosen": -3.6297452449798584, "logits/rejected": -3.4391212463378906, "logps/chosen": -169.07568359375, "logps/rejected": -153.61801147460938, "loss": 0.4104, "rewards/accuracies": 0.75, "rewards/chosen": 0.10558347404003143, "rewards/margins": 0.9694736003875732, "rewards/rejected": -0.8638901710510254, "step": 2174 }, { "epoch": 0.25, "learning_rate": 2.2822193608802528e-07, "logits/chosen": -3.5033671855926514, "logits/rejected": -3.094743251800537, "logps/chosen": -287.56732177734375, "logps/rejected": -252.23291015625, "loss": 0.5742, "rewards/accuracies": 0.75, "rewards/chosen": 0.0036702752113342285, "rewards/margins": 0.6941072940826416, "rewards/rejected": -0.6904370188713074, "step": 2175 }, { "epoch": 0.25, "learning_rate": 2.2818681961840103e-07, "logits/chosen": -3.6756601333618164, "logits/rejected": -3.496523380279541, "logps/chosen": -224.6309356689453, "logps/rejected": -249.92306518554688, "loss": 0.409, "rewards/accuracies": 0.75, "rewards/chosen": 0.14091336727142334, "rewards/margins": 1.437800407409668, "rewards/rejected": -1.2968870401382446, "step": 2176 }, { "epoch": 0.25, "learning_rate": 2.2815170314877676e-07, "logits/chosen": -2.996642589569092, "logits/rejected": -2.9101545810699463, "logps/chosen": -384.51800537109375, "logps/rejected": -260.63800048828125, "loss": 0.5525, "rewards/accuracies": 0.75, "rewards/chosen": 0.05160914361476898, "rewards/margins": 1.1540337800979614, "rewards/rejected": -1.1024246215820312, "step": 2177 }, { "epoch": 0.25, "learning_rate": 2.281165866791525e-07, "logits/chosen": -2.4927492141723633, "logits/rejected": -2.503787040710449, "logps/chosen": -275.6834411621094, "logps/rejected": -230.14968872070312, "loss": 0.5949, "rewards/accuracies": 0.75, "rewards/chosen": -0.6375506520271301, "rewards/margins": 0.836368203163147, "rewards/rejected": -1.4739189147949219, "step": 2178 }, { "epoch": 0.25, "learning_rate": 2.2808147020952826e-07, "logits/chosen": -3.007434844970703, "logits/rejected": -3.0320026874542236, "logps/chosen": -236.13670349121094, "logps/rejected": -146.66213989257812, "loss": 0.6096, "rewards/accuracies": 0.625, "rewards/chosen": -0.15751954913139343, "rewards/margins": 0.669103741645813, "rewards/rejected": -0.826623260974884, "step": 2179 }, { "epoch": 0.25, "learning_rate": 2.28046353739904e-07, "logits/chosen": -3.16267728805542, "logits/rejected": -3.1818904876708984, "logps/chosen": -456.2713928222656, "logps/rejected": -257.8637390136719, "loss": 0.2638, "rewards/accuracies": 0.875, "rewards/chosen": -0.06230699270963669, "rewards/margins": 1.9982186555862427, "rewards/rejected": -2.06052565574646, "step": 2180 }, { "epoch": 0.25, "learning_rate": 2.2801123727027975e-07, "logits/chosen": -3.335986852645874, "logits/rejected": -3.234585762023926, "logps/chosen": -168.18502807617188, "logps/rejected": -117.86390686035156, "loss": 0.6098, "rewards/accuracies": 0.75, "rewards/chosen": -0.25341537594795227, "rewards/margins": 0.2837415337562561, "rewards/rejected": -0.5371569395065308, "step": 2181 }, { "epoch": 0.25, "learning_rate": 2.2797612080065553e-07, "logits/chosen": -2.9828405380249023, "logits/rejected": -2.8815841674804688, "logps/chosen": -298.37127685546875, "logps/rejected": -332.8902282714844, "loss": 0.4075, "rewards/accuracies": 0.75, "rewards/chosen": 0.1333828866481781, "rewards/margins": 1.594447135925293, "rewards/rejected": -1.461064100265503, "step": 2182 }, { "epoch": 0.25, "learning_rate": 2.2794100433103123e-07, "logits/chosen": -3.875690460205078, "logits/rejected": -3.8597521781921387, "logps/chosen": -284.37811279296875, "logps/rejected": -309.10894775390625, "loss": 0.3848, "rewards/accuracies": 0.875, "rewards/chosen": 0.021378308534622192, "rewards/margins": 1.0725690126419067, "rewards/rejected": -1.0511906147003174, "step": 2183 }, { "epoch": 0.25, "learning_rate": 2.27905887861407e-07, "logits/chosen": -3.3483641147613525, "logits/rejected": -3.337801456451416, "logps/chosen": -143.1961212158203, "logps/rejected": -253.67123413085938, "loss": 0.3619, "rewards/accuracies": 0.75, "rewards/chosen": -0.15064403414726257, "rewards/margins": 2.736097574234009, "rewards/rejected": -2.886741876602173, "step": 2184 }, { "epoch": 0.25, "learning_rate": 2.2787077139178273e-07, "logits/chosen": -2.9544291496276855, "logits/rejected": -2.800509214401245, "logps/chosen": -252.32518005371094, "logps/rejected": -268.2479248046875, "loss": 0.3739, "rewards/accuracies": 0.875, "rewards/chosen": -0.573763370513916, "rewards/margins": 1.9583191871643066, "rewards/rejected": -2.5320825576782227, "step": 2185 }, { "epoch": 0.25, "learning_rate": 2.278356549221585e-07, "logits/chosen": -3.152986526489258, "logits/rejected": -2.855604410171509, "logps/chosen": -150.97015380859375, "logps/rejected": -128.8043212890625, "loss": 0.4921, "rewards/accuracies": 0.75, "rewards/chosen": -0.06456160545349121, "rewards/margins": 0.8390650153160095, "rewards/rejected": -0.903626561164856, "step": 2186 }, { "epoch": 0.25, "learning_rate": 2.2780053845253424e-07, "logits/chosen": -2.6811299324035645, "logits/rejected": -2.849144220352173, "logps/chosen": -174.7205047607422, "logps/rejected": -169.53009033203125, "loss": 1.0741, "rewards/accuracies": 0.625, "rewards/chosen": -0.6410802602767944, "rewards/margins": 0.19858889281749725, "rewards/rejected": -0.8396690487861633, "step": 2187 }, { "epoch": 0.25, "learning_rate": 2.2776542198290997e-07, "logits/chosen": -3.2585272789001465, "logits/rejected": -3.177778959274292, "logps/chosen": -498.5721130371094, "logps/rejected": -376.8558044433594, "loss": 0.6065, "rewards/accuracies": 0.625, "rewards/chosen": -0.6107594966888428, "rewards/margins": 0.8449984788894653, "rewards/rejected": -1.4557580947875977, "step": 2188 }, { "epoch": 0.25, "learning_rate": 2.2773030551328572e-07, "logits/chosen": -3.739912986755371, "logits/rejected": -3.3018579483032227, "logps/chosen": -233.6374969482422, "logps/rejected": -207.07614135742188, "loss": 0.5847, "rewards/accuracies": 0.5, "rewards/chosen": -0.15453240275382996, "rewards/margins": 0.39640113711357117, "rewards/rejected": -0.5509334802627563, "step": 2189 }, { "epoch": 0.25, "learning_rate": 2.2769518904366145e-07, "logits/chosen": -2.671588897705078, "logits/rejected": -2.4264400005340576, "logps/chosen": -292.240234375, "logps/rejected": -255.5592803955078, "loss": 0.8081, "rewards/accuracies": 0.5, "rewards/chosen": -0.31521016359329224, "rewards/margins": 0.46280139684677124, "rewards/rejected": -0.7780115604400635, "step": 2190 }, { "epoch": 0.25, "learning_rate": 2.276600725740372e-07, "logits/chosen": -2.468827247619629, "logits/rejected": -2.5742075443267822, "logps/chosen": -340.28753662109375, "logps/rejected": -389.3198547363281, "loss": 0.6655, "rewards/accuracies": 0.75, "rewards/chosen": -0.3011487126350403, "rewards/margins": 0.8277727365493774, "rewards/rejected": -1.128921389579773, "step": 2191 }, { "epoch": 0.25, "learning_rate": 2.2762495610441296e-07, "logits/chosen": -2.9728903770446777, "logits/rejected": -3.1461896896362305, "logps/chosen": -167.0803985595703, "logps/rejected": -184.75753784179688, "loss": 0.3716, "rewards/accuracies": 0.75, "rewards/chosen": 0.2924802899360657, "rewards/margins": 1.4994313716888428, "rewards/rejected": -1.2069510221481323, "step": 2192 }, { "epoch": 0.25, "learning_rate": 2.2758983963478869e-07, "logits/chosen": -2.8169963359832764, "logits/rejected": -2.824395179748535, "logps/chosen": -324.3594970703125, "logps/rejected": -242.78765869140625, "loss": 0.4923, "rewards/accuracies": 0.625, "rewards/chosen": -0.24145160615444183, "rewards/margins": 0.8665487766265869, "rewards/rejected": -1.10800039768219, "step": 2193 }, { "epoch": 0.25, "learning_rate": 2.2755472316516447e-07, "logits/chosen": -2.9910857677459717, "logits/rejected": -3.1291580200195312, "logps/chosen": -133.91082763671875, "logps/rejected": -339.8657531738281, "loss": 0.4806, "rewards/accuracies": 0.625, "rewards/chosen": 0.12414442002773285, "rewards/margins": 3.300455093383789, "rewards/rejected": -3.1763110160827637, "step": 2194 }, { "epoch": 0.25, "learning_rate": 2.2751960669554022e-07, "logits/chosen": -2.8426129817962646, "logits/rejected": -2.8806068897247314, "logps/chosen": -238.0952606201172, "logps/rejected": -184.66696166992188, "loss": 0.8204, "rewards/accuracies": 0.25, "rewards/chosen": -0.2021978795528412, "rewards/margins": 0.14173412322998047, "rewards/rejected": -0.34393200278282166, "step": 2195 }, { "epoch": 0.25, "learning_rate": 2.2748449022591595e-07, "logits/chosen": -3.8034210205078125, "logits/rejected": -3.7070631980895996, "logps/chosen": -257.9671325683594, "logps/rejected": -327.8367614746094, "loss": 0.2542, "rewards/accuracies": 1.0, "rewards/chosen": 0.21462735533714294, "rewards/margins": 2.504133462905884, "rewards/rejected": -2.289506435394287, "step": 2196 }, { "epoch": 0.25, "learning_rate": 2.274493737562917e-07, "logits/chosen": -2.9795875549316406, "logits/rejected": -2.9511022567749023, "logps/chosen": -386.87554931640625, "logps/rejected": -285.961669921875, "loss": 0.1439, "rewards/accuracies": 1.0, "rewards/chosen": 0.7351185083389282, "rewards/margins": 2.8124542236328125, "rewards/rejected": -2.0773355960845947, "step": 2197 }, { "epoch": 0.25, "learning_rate": 2.2741425728666743e-07, "logits/chosen": -3.3551764488220215, "logits/rejected": -3.2607264518737793, "logps/chosen": -228.473388671875, "logps/rejected": -244.93682861328125, "loss": 0.4245, "rewards/accuracies": 0.875, "rewards/chosen": 0.018006615340709686, "rewards/margins": 0.9778416156768799, "rewards/rejected": -0.9598349928855896, "step": 2198 }, { "epoch": 0.25, "learning_rate": 2.2737914081704318e-07, "logits/chosen": -3.5319674015045166, "logits/rejected": -3.5533721446990967, "logps/chosen": -96.42144775390625, "logps/rejected": -102.57795715332031, "loss": 0.4744, "rewards/accuracies": 0.875, "rewards/chosen": -0.1410767138004303, "rewards/margins": 0.5446161031723022, "rewards/rejected": -0.6856928467750549, "step": 2199 }, { "epoch": 0.25, "learning_rate": 2.2734402434741894e-07, "logits/chosen": -3.078167200088501, "logits/rejected": -3.207195997238159, "logps/chosen": -333.6822509765625, "logps/rejected": -255.9193572998047, "loss": 0.6359, "rewards/accuracies": 0.875, "rewards/chosen": -0.3659137487411499, "rewards/margins": 0.42312949895858765, "rewards/rejected": -0.7890431880950928, "step": 2200 }, { "epoch": 0.25, "learning_rate": 2.2730890787779466e-07, "logits/chosen": -2.517956018447876, "logits/rejected": -2.4445226192474365, "logps/chosen": -333.677490234375, "logps/rejected": -211.13037109375, "loss": 0.2447, "rewards/accuracies": 1.0, "rewards/chosen": 0.5625838041305542, "rewards/margins": 2.46097993850708, "rewards/rejected": -1.8983962535858154, "step": 2201 }, { "epoch": 0.25, "learning_rate": 2.2727379140817042e-07, "logits/chosen": -2.747490644454956, "logits/rejected": -2.8774454593658447, "logps/chosen": -257.6216125488281, "logps/rejected": -269.10150146484375, "loss": 0.4677, "rewards/accuracies": 0.75, "rewards/chosen": 0.3422894775867462, "rewards/margins": 0.6943532228469849, "rewards/rejected": -0.35206377506256104, "step": 2202 }, { "epoch": 0.25, "learning_rate": 2.2723867493854617e-07, "logits/chosen": -2.322866201400757, "logits/rejected": -2.334084987640381, "logps/chosen": -304.0146484375, "logps/rejected": -262.3143615722656, "loss": 0.6888, "rewards/accuracies": 0.75, "rewards/chosen": -0.3600270450115204, "rewards/margins": 0.4759736657142639, "rewards/rejected": -0.8360008001327515, "step": 2203 }, { "epoch": 0.25, "learning_rate": 2.272035584689219e-07, "logits/chosen": -2.68637752532959, "logits/rejected": -2.8301007747650146, "logps/chosen": -237.6531982421875, "logps/rejected": -224.08953857421875, "loss": 0.3836, "rewards/accuracies": 0.875, "rewards/chosen": 0.5830736756324768, "rewards/margins": 1.2394698858261108, "rewards/rejected": -0.6563961505889893, "step": 2204 }, { "epoch": 0.25, "learning_rate": 2.2716844199929768e-07, "logits/chosen": -3.033590793609619, "logits/rejected": -2.818540573120117, "logps/chosen": -273.6932067871094, "logps/rejected": -222.65040588378906, "loss": 0.681, "rewards/accuracies": 0.75, "rewards/chosen": -0.5942991375923157, "rewards/margins": 0.5739060044288635, "rewards/rejected": -1.1682050228118896, "step": 2205 }, { "epoch": 0.25, "learning_rate": 2.2713332552967338e-07, "logits/chosen": -3.183271646499634, "logits/rejected": -3.354708194732666, "logps/chosen": -194.7556610107422, "logps/rejected": -251.96849060058594, "loss": 0.1792, "rewards/accuracies": 1.0, "rewards/chosen": 0.06630364060401917, "rewards/margins": 2.051818370819092, "rewards/rejected": -1.985514521598816, "step": 2206 }, { "epoch": 0.25, "learning_rate": 2.2709820906004916e-07, "logits/chosen": -3.4493441581726074, "logits/rejected": -3.841705799102783, "logps/chosen": -117.37997436523438, "logps/rejected": -174.9344482421875, "loss": 0.3439, "rewards/accuracies": 0.875, "rewards/chosen": -0.19674226641654968, "rewards/margins": 1.5272915363311768, "rewards/rejected": -1.7240337133407593, "step": 2207 }, { "epoch": 0.25, "learning_rate": 2.2706309259042491e-07, "logits/chosen": -2.7928309440612793, "logits/rejected": -2.6430015563964844, "logps/chosen": -180.92123413085938, "logps/rejected": -255.7613525390625, "loss": 0.4399, "rewards/accuracies": 0.875, "rewards/chosen": 0.06936469674110413, "rewards/margins": 0.8699275851249695, "rewards/rejected": -0.800562858581543, "step": 2208 }, { "epoch": 0.25, "learning_rate": 2.2702797612080064e-07, "logits/chosen": -2.7166121006011963, "logits/rejected": -2.8603410720825195, "logps/chosen": -421.296875, "logps/rejected": -200.14544677734375, "loss": 0.5442, "rewards/accuracies": 0.625, "rewards/chosen": 0.2157871425151825, "rewards/margins": 0.5476203560829163, "rewards/rejected": -0.3318331837654114, "step": 2209 }, { "epoch": 0.25, "learning_rate": 2.269928596511764e-07, "logits/chosen": -3.432258129119873, "logits/rejected": -3.5610079765319824, "logps/chosen": -195.3793487548828, "logps/rejected": -168.77395629882812, "loss": 0.4232, "rewards/accuracies": 0.75, "rewards/chosen": 0.021966442465782166, "rewards/margins": 1.0210915803909302, "rewards/rejected": -0.9991251230239868, "step": 2210 }, { "epoch": 0.25, "learning_rate": 2.2695774318155215e-07, "logits/chosen": -2.771503448486328, "logits/rejected": -2.740757465362549, "logps/chosen": -325.5924377441406, "logps/rejected": -288.82373046875, "loss": 0.4106, "rewards/accuracies": 0.75, "rewards/chosen": -0.34828251600265503, "rewards/margins": 1.9800866842269897, "rewards/rejected": -2.328369140625, "step": 2211 }, { "epoch": 0.26, "learning_rate": 2.2692262671192788e-07, "logits/chosen": -3.233912467956543, "logits/rejected": -3.304896116256714, "logps/chosen": -110.72102355957031, "logps/rejected": -103.87687683105469, "loss": 0.7458, "rewards/accuracies": 0.5, "rewards/chosen": 0.014604642987251282, "rewards/margins": 0.4600673317909241, "rewards/rejected": -0.445462703704834, "step": 2212 }, { "epoch": 0.26, "learning_rate": 2.2688751024230363e-07, "logits/chosen": -2.416687488555908, "logits/rejected": -2.513040542602539, "logps/chosen": -161.75949096679688, "logps/rejected": -208.6759796142578, "loss": 0.6325, "rewards/accuracies": 0.375, "rewards/chosen": -0.26415586471557617, "rewards/margins": 0.599382758140564, "rewards/rejected": -0.8635387420654297, "step": 2213 }, { "epoch": 0.26, "learning_rate": 2.2685239377267936e-07, "logits/chosen": -2.7444801330566406, "logits/rejected": -3.098662853240967, "logps/chosen": -383.8021545410156, "logps/rejected": -271.0512390136719, "loss": 0.2592, "rewards/accuracies": 0.875, "rewards/chosen": 0.3276050090789795, "rewards/margins": 1.4762645959854126, "rewards/rejected": -1.148659586906433, "step": 2214 }, { "epoch": 0.26, "learning_rate": 2.268172773030551e-07, "logits/chosen": -3.567033290863037, "logits/rejected": -3.446316957473755, "logps/chosen": -503.92425537109375, "logps/rejected": -296.9543762207031, "loss": 0.3258, "rewards/accuracies": 0.875, "rewards/chosen": -0.13412217795848846, "rewards/margins": 1.6012651920318604, "rewards/rejected": -1.7353874444961548, "step": 2215 }, { "epoch": 0.26, "learning_rate": 2.267821608334309e-07, "logits/chosen": -2.9030933380126953, "logits/rejected": -2.9637844562530518, "logps/chosen": -498.39849853515625, "logps/rejected": -255.85308837890625, "loss": 0.4309, "rewards/accuracies": 1.0, "rewards/chosen": 0.7231199145317078, "rewards/margins": 0.7103961706161499, "rewards/rejected": 0.012723691761493683, "step": 2216 }, { "epoch": 0.26, "learning_rate": 2.267470443638066e-07, "logits/chosen": -3.359099864959717, "logits/rejected": -3.1479880809783936, "logps/chosen": -129.34469604492188, "logps/rejected": -171.88211059570312, "loss": 0.2768, "rewards/accuracies": 0.875, "rewards/chosen": -0.20070721209049225, "rewards/margins": 1.8131184577941895, "rewards/rejected": -2.0138256549835205, "step": 2217 }, { "epoch": 0.26, "learning_rate": 2.2671192789418237e-07, "logits/chosen": -3.5284810066223145, "logits/rejected": -3.7530899047851562, "logps/chosen": -111.2460708618164, "logps/rejected": -190.41525268554688, "loss": 0.5121, "rewards/accuracies": 0.5, "rewards/chosen": -0.06265842914581299, "rewards/margins": 1.574991226196289, "rewards/rejected": -1.6376495361328125, "step": 2218 }, { "epoch": 0.26, "learning_rate": 2.2667681142455813e-07, "logits/chosen": -2.9116177558898926, "logits/rejected": -3.0700182914733887, "logps/chosen": -259.44573974609375, "logps/rejected": -279.88458251953125, "loss": 0.2211, "rewards/accuracies": 1.0, "rewards/chosen": 0.4714348316192627, "rewards/margins": 2.3420186042785645, "rewards/rejected": -1.8705838918685913, "step": 2219 }, { "epoch": 0.26, "learning_rate": 2.2664169495493385e-07, "logits/chosen": -3.32549786567688, "logits/rejected": -3.1573853492736816, "logps/chosen": -429.72613525390625, "logps/rejected": -315.81298828125, "loss": 0.2518, "rewards/accuracies": 0.875, "rewards/chosen": -0.07049253582954407, "rewards/margins": 2.0963709354400635, "rewards/rejected": -2.1668636798858643, "step": 2220 }, { "epoch": 0.26, "learning_rate": 2.266065784853096e-07, "logits/chosen": -2.5333878993988037, "logits/rejected": -2.593008279800415, "logps/chosen": -248.7277069091797, "logps/rejected": -215.50869750976562, "loss": 0.4678, "rewards/accuracies": 0.75, "rewards/chosen": 0.0822727233171463, "rewards/margins": 1.0089495182037354, "rewards/rejected": -0.926676869392395, "step": 2221 }, { "epoch": 0.26, "learning_rate": 2.2657146201568534e-07, "logits/chosen": -3.3910741806030273, "logits/rejected": -3.4796977043151855, "logps/chosen": -275.5450134277344, "logps/rejected": -328.5380859375, "loss": 0.8617, "rewards/accuracies": 0.5, "rewards/chosen": -1.3655571937561035, "rewards/margins": 0.3967825770378113, "rewards/rejected": -1.7623398303985596, "step": 2222 }, { "epoch": 0.26, "learning_rate": 2.265363455460611e-07, "logits/chosen": -2.503227472305298, "logits/rejected": -2.6724305152893066, "logps/chosen": -259.76019287109375, "logps/rejected": -210.8515167236328, "loss": 0.2937, "rewards/accuracies": 1.0, "rewards/chosen": 0.16455549001693726, "rewards/margins": 1.9994087219238281, "rewards/rejected": -1.8348530530929565, "step": 2223 }, { "epoch": 0.26, "learning_rate": 2.2650122907643684e-07, "logits/chosen": -3.1582465171813965, "logits/rejected": -3.1813392639160156, "logps/chosen": -276.953369140625, "logps/rejected": -202.95489501953125, "loss": 0.3505, "rewards/accuracies": 0.75, "rewards/chosen": 0.10009332746267319, "rewards/margins": 1.81458580493927, "rewards/rejected": -1.7144925594329834, "step": 2224 }, { "epoch": 0.26, "learning_rate": 2.2646611260681257e-07, "logits/chosen": -3.238157272338867, "logits/rejected": -3.109389305114746, "logps/chosen": -327.6822509765625, "logps/rejected": -177.24972534179688, "loss": 0.4003, "rewards/accuracies": 0.75, "rewards/chosen": 0.2721123695373535, "rewards/margins": 1.6343474388122559, "rewards/rejected": -1.3622353076934814, "step": 2225 }, { "epoch": 0.26, "learning_rate": 2.2643099613718832e-07, "logits/chosen": -3.5098376274108887, "logits/rejected": -3.6221776008605957, "logps/chosen": -275.2200927734375, "logps/rejected": -207.06979370117188, "loss": 0.3724, "rewards/accuracies": 0.875, "rewards/chosen": -0.25513190031051636, "rewards/margins": 1.1890825033187866, "rewards/rejected": -1.4442143440246582, "step": 2226 }, { "epoch": 0.26, "learning_rate": 2.263958796675641e-07, "logits/chosen": -2.8032596111297607, "logits/rejected": -2.477343797683716, "logps/chosen": -318.05938720703125, "logps/rejected": -340.1149597167969, "loss": 0.6735, "rewards/accuracies": 0.5, "rewards/chosen": -0.21060171723365784, "rewards/margins": 0.6630598306655884, "rewards/rejected": -0.8736615777015686, "step": 2227 }, { "epoch": 0.26, "learning_rate": 2.2636076319793983e-07, "logits/chosen": -3.5269153118133545, "logits/rejected": -3.714320659637451, "logps/chosen": -229.12062072753906, "logps/rejected": -323.8396911621094, "loss": 0.2625, "rewards/accuracies": 0.75, "rewards/chosen": -0.08395881205797195, "rewards/margins": 3.221024513244629, "rewards/rejected": -3.304983139038086, "step": 2228 }, { "epoch": 0.26, "learning_rate": 2.2632564672831559e-07, "logits/chosen": -2.9582176208496094, "logits/rejected": -3.087999105453491, "logps/chosen": -243.1240234375, "logps/rejected": -205.3972625732422, "loss": 0.3026, "rewards/accuracies": 0.875, "rewards/chosen": 0.5497586727142334, "rewards/margins": 2.2877979278564453, "rewards/rejected": -1.7380391359329224, "step": 2229 }, { "epoch": 0.26, "learning_rate": 2.2629053025869131e-07, "logits/chosen": -3.924192428588867, "logits/rejected": -3.7524094581604004, "logps/chosen": -207.98471069335938, "logps/rejected": -190.6427001953125, "loss": 0.5701, "rewards/accuracies": 0.625, "rewards/chosen": -1.2102999687194824, "rewards/margins": 0.8181689381599426, "rewards/rejected": -2.0284688472747803, "step": 2230 }, { "epoch": 0.26, "learning_rate": 2.2625541378906707e-07, "logits/chosen": -3.1217308044433594, "logits/rejected": -3.0200002193450928, "logps/chosen": -219.8317413330078, "logps/rejected": -128.22268676757812, "loss": 0.3917, "rewards/accuracies": 0.875, "rewards/chosen": -0.06199701130390167, "rewards/margins": 0.9160330891609192, "rewards/rejected": -0.9780300855636597, "step": 2231 }, { "epoch": 0.26, "learning_rate": 2.2622029731944282e-07, "logits/chosen": -3.3196372985839844, "logits/rejected": -3.142343759536743, "logps/chosen": -181.75997924804688, "logps/rejected": -197.08424377441406, "loss": 0.5066, "rewards/accuracies": 0.625, "rewards/chosen": -0.4157918393611908, "rewards/margins": 1.1415128707885742, "rewards/rejected": -1.5573046207427979, "step": 2232 }, { "epoch": 0.26, "learning_rate": 2.2618518084981855e-07, "logits/chosen": -2.6792635917663574, "logits/rejected": -2.5943212509155273, "logps/chosen": -380.5477294921875, "logps/rejected": -300.2999572753906, "loss": 0.5525, "rewards/accuracies": 0.625, "rewards/chosen": -0.7593969702720642, "rewards/margins": 0.9040735960006714, "rewards/rejected": -1.6634706258773804, "step": 2233 }, { "epoch": 0.26, "learning_rate": 2.261500643801943e-07, "logits/chosen": -2.836219310760498, "logits/rejected": -2.73982834815979, "logps/chosen": -322.259033203125, "logps/rejected": -303.5791015625, "loss": 0.8128, "rewards/accuracies": 0.5, "rewards/chosen": -0.33855730295181274, "rewards/margins": 0.0060512349009513855, "rewards/rejected": -0.34460848569869995, "step": 2234 }, { "epoch": 0.26, "learning_rate": 2.2611494791057003e-07, "logits/chosen": -2.683610439300537, "logits/rejected": -2.830195188522339, "logps/chosen": -375.2545166015625, "logps/rejected": -368.57177734375, "loss": 0.2837, "rewards/accuracies": 1.0, "rewards/chosen": 0.74430251121521, "rewards/margins": 1.4760124683380127, "rewards/rejected": -0.7317099571228027, "step": 2235 }, { "epoch": 0.26, "learning_rate": 2.2607983144094578e-07, "logits/chosen": -3.5386037826538086, "logits/rejected": -4.169234275817871, "logps/chosen": -247.97384643554688, "logps/rejected": -352.4546813964844, "loss": 0.5946, "rewards/accuracies": 0.75, "rewards/chosen": -0.2025674432516098, "rewards/margins": 1.3374712467193604, "rewards/rejected": -1.5400387048721313, "step": 2236 }, { "epoch": 0.26, "learning_rate": 2.2604471497132154e-07, "logits/chosen": -3.09023380279541, "logits/rejected": -3.0649635791778564, "logps/chosen": -211.0482635498047, "logps/rejected": -278.8951721191406, "loss": 0.3222, "rewards/accuracies": 0.875, "rewards/chosen": 0.08305928111076355, "rewards/margins": 1.8281651735305786, "rewards/rejected": -1.7451059818267822, "step": 2237 }, { "epoch": 0.26, "learning_rate": 2.2600959850169727e-07, "logits/chosen": -2.829986095428467, "logits/rejected": -3.001282215118408, "logps/chosen": -335.6710205078125, "logps/rejected": -313.0057678222656, "loss": 0.3762, "rewards/accuracies": 0.75, "rewards/chosen": -0.1497618705034256, "rewards/margins": 1.4237704277038574, "rewards/rejected": -1.573532223701477, "step": 2238 }, { "epoch": 0.26, "learning_rate": 2.2597448203207305e-07, "logits/chosen": -3.5548465251922607, "logits/rejected": -3.7334554195404053, "logps/chosen": -311.49920654296875, "logps/rejected": -268.40765380859375, "loss": 0.32, "rewards/accuracies": 0.875, "rewards/chosen": 0.05918455868959427, "rewards/margins": 1.4807531833648682, "rewards/rejected": -1.4215686321258545, "step": 2239 }, { "epoch": 0.26, "learning_rate": 2.259393655624488e-07, "logits/chosen": -3.6163840293884277, "logits/rejected": -3.726585865020752, "logps/chosen": -179.51388549804688, "logps/rejected": -156.46107482910156, "loss": 0.3629, "rewards/accuracies": 0.625, "rewards/chosen": 0.5495415925979614, "rewards/margins": 1.4631555080413818, "rewards/rejected": -0.9136137962341309, "step": 2240 }, { "epoch": 0.26, "learning_rate": 2.2590424909282453e-07, "logits/chosen": -4.048492431640625, "logits/rejected": -3.6627912521362305, "logps/chosen": -275.87420654296875, "logps/rejected": -234.1333465576172, "loss": 0.5873, "rewards/accuracies": 0.5, "rewards/chosen": -0.23903483152389526, "rewards/margins": 0.8965874910354614, "rewards/rejected": -1.135622262954712, "step": 2241 }, { "epoch": 0.26, "learning_rate": 2.2586913262320028e-07, "logits/chosen": -2.667475938796997, "logits/rejected": -2.7740554809570312, "logps/chosen": -154.42405700683594, "logps/rejected": -212.1448211669922, "loss": 0.3884, "rewards/accuracies": 0.75, "rewards/chosen": 0.041147418320178986, "rewards/margins": 1.4833874702453613, "rewards/rejected": -1.4422399997711182, "step": 2242 }, { "epoch": 0.26, "learning_rate": 2.25834016153576e-07, "logits/chosen": -2.6408472061157227, "logits/rejected": -2.5702755451202393, "logps/chosen": -334.9706115722656, "logps/rejected": -355.46429443359375, "loss": 0.4369, "rewards/accuracies": 0.75, "rewards/chosen": -0.1447509378194809, "rewards/margins": 1.4408013820648193, "rewards/rejected": -1.5855523347854614, "step": 2243 }, { "epoch": 0.26, "learning_rate": 2.2579889968395176e-07, "logits/chosen": -3.1651813983917236, "logits/rejected": -3.2105979919433594, "logps/chosen": -300.7978515625, "logps/rejected": -302.9718322753906, "loss": 0.6221, "rewards/accuracies": 0.625, "rewards/chosen": 0.3748359680175781, "rewards/margins": 1.004964828491211, "rewards/rejected": -0.6301287412643433, "step": 2244 }, { "epoch": 0.26, "learning_rate": 2.2576378321432752e-07, "logits/chosen": -3.193474531173706, "logits/rejected": -3.421196699142456, "logps/chosen": -130.29763793945312, "logps/rejected": -188.87966918945312, "loss": 0.3667, "rewards/accuracies": 0.875, "rewards/chosen": 0.890293300151825, "rewards/margins": 1.6719597578048706, "rewards/rejected": -0.7816663980484009, "step": 2245 }, { "epoch": 0.26, "learning_rate": 2.2572866674470324e-07, "logits/chosen": -4.204399108886719, "logits/rejected": -3.5902903079986572, "logps/chosen": -243.09902954101562, "logps/rejected": -157.76229858398438, "loss": 0.506, "rewards/accuracies": 0.75, "rewards/chosen": -0.264714777469635, "rewards/margins": 1.1124815940856934, "rewards/rejected": -1.3771963119506836, "step": 2246 }, { "epoch": 0.26, "learning_rate": 2.25693550275079e-07, "logits/chosen": -3.070166826248169, "logits/rejected": -2.9078359603881836, "logps/chosen": -335.2261962890625, "logps/rejected": -293.58782958984375, "loss": 0.094, "rewards/accuracies": 1.0, "rewards/chosen": 0.9442602396011353, "rewards/margins": 2.633467674255371, "rewards/rejected": -1.6892074346542358, "step": 2247 }, { "epoch": 0.26, "learning_rate": 2.2565843380545475e-07, "logits/chosen": -3.242949962615967, "logits/rejected": -3.335228681564331, "logps/chosen": -332.5345458984375, "logps/rejected": -274.72711181640625, "loss": 0.221, "rewards/accuracies": 1.0, "rewards/chosen": 0.15956228971481323, "rewards/margins": 2.2111854553222656, "rewards/rejected": -2.0516231060028076, "step": 2248 }, { "epoch": 0.26, "learning_rate": 2.2562331733583048e-07, "logits/chosen": -2.97841739654541, "logits/rejected": -2.8605456352233887, "logps/chosen": -289.9798278808594, "logps/rejected": -238.18922424316406, "loss": 0.2724, "rewards/accuracies": 0.875, "rewards/chosen": 0.5521097779273987, "rewards/margins": 2.0496795177459717, "rewards/rejected": -1.4975697994232178, "step": 2249 }, { "epoch": 0.26, "learning_rate": 2.2558820086620626e-07, "logits/chosen": -3.384396553039551, "logits/rejected": -3.1368472576141357, "logps/chosen": -283.4202575683594, "logps/rejected": -179.10008239746094, "loss": 0.6747, "rewards/accuracies": 0.625, "rewards/chosen": -0.10693298280239105, "rewards/margins": 0.4967597424983978, "rewards/rejected": -0.6036927103996277, "step": 2250 }, { "epoch": 0.26, "learning_rate": 2.2555308439658196e-07, "logits/chosen": -3.3267688751220703, "logits/rejected": -3.4959793090820312, "logps/chosen": -275.69586181640625, "logps/rejected": -225.05133056640625, "loss": 0.4673, "rewards/accuracies": 0.625, "rewards/chosen": -0.4311789572238922, "rewards/margins": 1.9876725673675537, "rewards/rejected": -2.418851613998413, "step": 2251 }, { "epoch": 0.26, "learning_rate": 2.2551796792695774e-07, "logits/chosen": -3.4552927017211914, "logits/rejected": -3.367327928543091, "logps/chosen": -499.72039794921875, "logps/rejected": -357.138671875, "loss": 0.5066, "rewards/accuracies": 0.75, "rewards/chosen": 0.12537826597690582, "rewards/margins": 1.9460505247116089, "rewards/rejected": -1.8206722736358643, "step": 2252 }, { "epoch": 0.26, "learning_rate": 2.254828514573335e-07, "logits/chosen": -2.8800249099731445, "logits/rejected": -3.164947748184204, "logps/chosen": -136.47613525390625, "logps/rejected": -227.69471740722656, "loss": 0.3149, "rewards/accuracies": 0.875, "rewards/chosen": -0.10136361420154572, "rewards/margins": 1.9796645641326904, "rewards/rejected": -2.0810279846191406, "step": 2253 }, { "epoch": 0.26, "learning_rate": 2.2544773498770922e-07, "logits/chosen": -2.4175760746002197, "logits/rejected": -2.373337507247925, "logps/chosen": -337.4205322265625, "logps/rejected": -283.81549072265625, "loss": 0.4172, "rewards/accuracies": 0.625, "rewards/chosen": 0.2961743474006653, "rewards/margins": 1.4359197616577148, "rewards/rejected": -1.1397454738616943, "step": 2254 }, { "epoch": 0.26, "learning_rate": 2.2541261851808497e-07, "logits/chosen": -3.231973171234131, "logits/rejected": -2.943213939666748, "logps/chosen": -326.1588439941406, "logps/rejected": -273.28497314453125, "loss": 0.4713, "rewards/accuracies": 0.625, "rewards/chosen": 0.14142769575119019, "rewards/margins": 2.073690891265869, "rewards/rejected": -1.9322631359100342, "step": 2255 }, { "epoch": 0.26, "learning_rate": 2.2537750204846073e-07, "logits/chosen": -2.9443790912628174, "logits/rejected": -2.9598214626312256, "logps/chosen": -489.3149719238281, "logps/rejected": -478.10638427734375, "loss": 0.6087, "rewards/accuracies": 0.5, "rewards/chosen": -0.558492124080658, "rewards/margins": 1.161177396774292, "rewards/rejected": -1.7196694612503052, "step": 2256 }, { "epoch": 0.26, "learning_rate": 2.2534238557883646e-07, "logits/chosen": -3.1329293251037598, "logits/rejected": -3.4967658519744873, "logps/chosen": -275.5706787109375, "logps/rejected": -271.963623046875, "loss": 0.512, "rewards/accuracies": 0.625, "rewards/chosen": -0.8178631663322449, "rewards/margins": 2.155769109725952, "rewards/rejected": -2.9736320972442627, "step": 2257 }, { "epoch": 0.26, "learning_rate": 2.253072691092122e-07, "logits/chosen": -3.827704429626465, "logits/rejected": -3.7706682682037354, "logps/chosen": -213.4423370361328, "logps/rejected": -269.92901611328125, "loss": 0.3274, "rewards/accuracies": 0.75, "rewards/chosen": -0.05744795501232147, "rewards/margins": 1.8602038621902466, "rewards/rejected": -1.9176517724990845, "step": 2258 }, { "epoch": 0.26, "learning_rate": 2.2527215263958794e-07, "logits/chosen": -2.552145004272461, "logits/rejected": -2.794638156890869, "logps/chosen": -309.59417724609375, "logps/rejected": -250.362060546875, "loss": 0.4627, "rewards/accuracies": 0.875, "rewards/chosen": -0.3958032727241516, "rewards/margins": 1.2265560626983643, "rewards/rejected": -1.6223593950271606, "step": 2259 }, { "epoch": 0.26, "learning_rate": 2.252370361699637e-07, "logits/chosen": -3.6009974479675293, "logits/rejected": -3.0541000366210938, "logps/chosen": -328.2898254394531, "logps/rejected": -250.28208923339844, "loss": 0.269, "rewards/accuracies": 1.0, "rewards/chosen": 0.4292284846305847, "rewards/margins": 1.7118525505065918, "rewards/rejected": -1.2826241254806519, "step": 2260 }, { "epoch": 0.26, "learning_rate": 2.2520191970033947e-07, "logits/chosen": -2.3224234580993652, "logits/rejected": -2.2000749111175537, "logps/chosen": -483.48291015625, "logps/rejected": -439.1147155761719, "loss": 0.2603, "rewards/accuracies": 1.0, "rewards/chosen": 0.44971317052841187, "rewards/margins": 1.9741356372833252, "rewards/rejected": -1.5244224071502686, "step": 2261 }, { "epoch": 0.26, "learning_rate": 2.2516680323071517e-07, "logits/chosen": -3.180293560028076, "logits/rejected": -2.922402858734131, "logps/chosen": -162.604248046875, "logps/rejected": -226.42010498046875, "loss": 0.3257, "rewards/accuracies": 0.75, "rewards/chosen": 0.3010476231575012, "rewards/margins": 2.251763343811035, "rewards/rejected": -1.9507155418395996, "step": 2262 }, { "epoch": 0.26, "learning_rate": 2.2513168676109095e-07, "logits/chosen": -3.921855926513672, "logits/rejected": -3.881495475769043, "logps/chosen": -288.08709716796875, "logps/rejected": -198.5615234375, "loss": 0.5503, "rewards/accuracies": 0.75, "rewards/chosen": -0.6627322435379028, "rewards/margins": 1.1447124481201172, "rewards/rejected": -1.8074445724487305, "step": 2263 }, { "epoch": 0.26, "learning_rate": 2.250965702914667e-07, "logits/chosen": -3.5100176334381104, "logits/rejected": -3.2538137435913086, "logps/chosen": -304.4443054199219, "logps/rejected": -280.97332763671875, "loss": 0.6767, "rewards/accuracies": 0.625, "rewards/chosen": -0.4202141761779785, "rewards/margins": 0.7322701215744019, "rewards/rejected": -1.1524842977523804, "step": 2264 }, { "epoch": 0.26, "learning_rate": 2.2506145382184243e-07, "logits/chosen": -3.3186898231506348, "logits/rejected": -3.2877187728881836, "logps/chosen": -203.36044311523438, "logps/rejected": -159.20184326171875, "loss": 0.4219, "rewards/accuracies": 0.875, "rewards/chosen": 0.18551760911941528, "rewards/margins": 1.2251909971237183, "rewards/rejected": -1.0396734476089478, "step": 2265 }, { "epoch": 0.26, "learning_rate": 2.250263373522182e-07, "logits/chosen": -3.026066780090332, "logits/rejected": -3.175785779953003, "logps/chosen": -175.00706481933594, "logps/rejected": -174.04225158691406, "loss": 0.2577, "rewards/accuracies": 1.0, "rewards/chosen": 0.3849083483219147, "rewards/margins": 1.82658052444458, "rewards/rejected": -1.4416720867156982, "step": 2266 }, { "epoch": 0.26, "learning_rate": 2.2499122088259392e-07, "logits/chosen": -3.1592414379119873, "logits/rejected": -3.0226528644561768, "logps/chosen": -234.56674194335938, "logps/rejected": -240.3091583251953, "loss": 0.2013, "rewards/accuracies": 1.0, "rewards/chosen": 0.42934906482696533, "rewards/margins": 2.0805978775024414, "rewards/rejected": -1.6512489318847656, "step": 2267 }, { "epoch": 0.26, "learning_rate": 2.2495610441296967e-07, "logits/chosen": -3.399369716644287, "logits/rejected": -3.2219815254211426, "logps/chosen": -208.96234130859375, "logps/rejected": -252.11996459960938, "loss": 0.7089, "rewards/accuracies": 0.625, "rewards/chosen": -0.4053388237953186, "rewards/margins": 0.9835020303726196, "rewards/rejected": -1.388840675354004, "step": 2268 }, { "epoch": 0.26, "learning_rate": 2.2492098794334542e-07, "logits/chosen": -3.368628978729248, "logits/rejected": -3.531409978866577, "logps/chosen": -316.14813232421875, "logps/rejected": -269.9251403808594, "loss": 0.2962, "rewards/accuracies": 1.0, "rewards/chosen": 0.29628127813339233, "rewards/margins": 1.735966444015503, "rewards/rejected": -1.4396851062774658, "step": 2269 }, { "epoch": 0.26, "learning_rate": 2.2488587147372115e-07, "logits/chosen": -3.462883949279785, "logits/rejected": -3.572935104370117, "logps/chosen": -182.82998657226562, "logps/rejected": -194.19821166992188, "loss": 0.2686, "rewards/accuracies": 0.875, "rewards/chosen": 0.3008688688278198, "rewards/margins": 1.8331443071365356, "rewards/rejected": -1.5322753190994263, "step": 2270 }, { "epoch": 0.26, "learning_rate": 2.248507550040969e-07, "logits/chosen": -3.124620199203491, "logits/rejected": -3.184330940246582, "logps/chosen": -316.53289794921875, "logps/rejected": -246.68612670898438, "loss": 0.3911, "rewards/accuracies": 0.875, "rewards/chosen": -0.1311386078596115, "rewards/margins": 1.0782136917114258, "rewards/rejected": -1.2093522548675537, "step": 2271 }, { "epoch": 0.26, "learning_rate": 2.2481563853447268e-07, "logits/chosen": -3.1124401092529297, "logits/rejected": -3.0829367637634277, "logps/chosen": -248.4344940185547, "logps/rejected": -163.2216796875, "loss": 0.4247, "rewards/accuracies": 0.875, "rewards/chosen": 0.027779266238212585, "rewards/margins": 0.9924795627593994, "rewards/rejected": -0.9647003412246704, "step": 2272 }, { "epoch": 0.26, "learning_rate": 2.247805220648484e-07, "logits/chosen": -3.8498446941375732, "logits/rejected": -3.9508254528045654, "logps/chosen": -450.53851318359375, "logps/rejected": -388.12445068359375, "loss": 0.8231, "rewards/accuracies": 0.875, "rewards/chosen": 0.08393092453479767, "rewards/margins": 0.8466235399246216, "rewards/rejected": -0.7626925706863403, "step": 2273 }, { "epoch": 0.26, "learning_rate": 2.2474540559522417e-07, "logits/chosen": -2.8272480964660645, "logits/rejected": -2.8338663578033447, "logps/chosen": -207.17022705078125, "logps/rejected": -241.3815155029297, "loss": 0.2304, "rewards/accuracies": 1.0, "rewards/chosen": 0.3198411464691162, "rewards/margins": 1.5751274824142456, "rewards/rejected": -1.2552860975265503, "step": 2274 }, { "epoch": 0.26, "learning_rate": 2.247102891255999e-07, "logits/chosen": -3.4083895683288574, "logits/rejected": -3.4066579341888428, "logps/chosen": -254.93682861328125, "logps/rejected": -250.876708984375, "loss": 0.3784, "rewards/accuracies": 0.75, "rewards/chosen": -0.33506911993026733, "rewards/margins": 2.063014507293701, "rewards/rejected": -2.3980836868286133, "step": 2275 }, { "epoch": 0.26, "learning_rate": 2.2467517265597565e-07, "logits/chosen": -3.3835816383361816, "logits/rejected": -3.297212600708008, "logps/chosen": -236.33331298828125, "logps/rejected": -230.04714965820312, "loss": 0.6339, "rewards/accuracies": 0.625, "rewards/chosen": 0.006941929459571838, "rewards/margins": 0.6138090491294861, "rewards/rejected": -0.6068670749664307, "step": 2276 }, { "epoch": 0.26, "learning_rate": 2.246400561863514e-07, "logits/chosen": -3.1519381999969482, "logits/rejected": -3.1097850799560547, "logps/chosen": -264.0862731933594, "logps/rejected": -244.04307556152344, "loss": 0.6354, "rewards/accuracies": 0.75, "rewards/chosen": -0.42900213599205017, "rewards/margins": 0.920783281326294, "rewards/rejected": -1.3497854471206665, "step": 2277 }, { "epoch": 0.26, "learning_rate": 2.2460493971672713e-07, "logits/chosen": -3.354160785675049, "logits/rejected": -3.571348190307617, "logps/chosen": -108.08409881591797, "logps/rejected": -188.0736846923828, "loss": 0.5215, "rewards/accuracies": 0.625, "rewards/chosen": -0.40679460763931274, "rewards/margins": 1.5039973258972168, "rewards/rejected": -1.9107921123504639, "step": 2278 }, { "epoch": 0.26, "learning_rate": 2.2456982324710288e-07, "logits/chosen": -2.8309545516967773, "logits/rejected": -2.372727632522583, "logps/chosen": -108.12954711914062, "logps/rejected": -144.30599975585938, "loss": 0.6019, "rewards/accuracies": 0.875, "rewards/chosen": -0.49348244071006775, "rewards/margins": 0.5499218702316284, "rewards/rejected": -1.043404221534729, "step": 2279 }, { "epoch": 0.26, "learning_rate": 2.245347067774786e-07, "logits/chosen": -3.8833813667297363, "logits/rejected": -3.5503346920013428, "logps/chosen": -541.40380859375, "logps/rejected": -204.13046264648438, "loss": 0.29, "rewards/accuracies": 1.0, "rewards/chosen": -0.14965233206748962, "rewards/margins": 1.7203710079193115, "rewards/rejected": -1.870023488998413, "step": 2280 }, { "epoch": 0.26, "learning_rate": 2.2449959030785436e-07, "logits/chosen": -3.4936163425445557, "logits/rejected": -3.4844467639923096, "logps/chosen": -168.93914794921875, "logps/rejected": -158.01846313476562, "loss": 0.499, "rewards/accuracies": 0.625, "rewards/chosen": -0.01446482539176941, "rewards/margins": 1.1886029243469238, "rewards/rejected": -1.2030677795410156, "step": 2281 }, { "epoch": 0.26, "learning_rate": 2.2446447383823012e-07, "logits/chosen": -3.4351913928985596, "logits/rejected": -3.2744905948638916, "logps/chosen": -238.66021728515625, "logps/rejected": -289.5440979003906, "loss": 0.382, "rewards/accuracies": 0.875, "rewards/chosen": -0.09510049223899841, "rewards/margins": 1.0728797912597656, "rewards/rejected": -1.1679801940917969, "step": 2282 }, { "epoch": 0.26, "learning_rate": 2.2442935736860584e-07, "logits/chosen": -3.14445424079895, "logits/rejected": -2.8833515644073486, "logps/chosen": -280.6742248535156, "logps/rejected": -278.46588134765625, "loss": 0.2798, "rewards/accuracies": 0.875, "rewards/chosen": 0.6236699819564819, "rewards/margins": 2.594407796859741, "rewards/rejected": -1.9707375764846802, "step": 2283 }, { "epoch": 0.26, "learning_rate": 2.2439424089898162e-07, "logits/chosen": -3.1564388275146484, "logits/rejected": -2.964195728302002, "logps/chosen": -365.9484558105469, "logps/rejected": -265.7365417480469, "loss": 0.4196, "rewards/accuracies": 0.75, "rewards/chosen": 0.11767662316560745, "rewards/margins": 1.4449424743652344, "rewards/rejected": -1.327265977859497, "step": 2284 }, { "epoch": 0.26, "learning_rate": 2.2435912442935738e-07, "logits/chosen": -2.6983301639556885, "logits/rejected": -2.59895396232605, "logps/chosen": -239.6616668701172, "logps/rejected": -323.2125549316406, "loss": 0.6148, "rewards/accuracies": 0.75, "rewards/chosen": -0.42109742760658264, "rewards/margins": 0.9226599931716919, "rewards/rejected": -1.3437573909759521, "step": 2285 }, { "epoch": 0.26, "learning_rate": 2.243240079597331e-07, "logits/chosen": -3.0785109996795654, "logits/rejected": -2.861117124557495, "logps/chosen": -255.2427520751953, "logps/rejected": -225.98379516601562, "loss": 0.4827, "rewards/accuracies": 0.875, "rewards/chosen": -0.2939375638961792, "rewards/margins": 1.5898536443710327, "rewards/rejected": -1.883791208267212, "step": 2286 }, { "epoch": 0.26, "learning_rate": 2.2428889149010886e-07, "logits/chosen": -3.454373359680176, "logits/rejected": -3.180373430252075, "logps/chosen": -486.55059814453125, "logps/rejected": -258.9229736328125, "loss": 0.2199, "rewards/accuracies": 1.0, "rewards/chosen": 0.14558573067188263, "rewards/margins": 2.226757287979126, "rewards/rejected": -2.081171751022339, "step": 2287 }, { "epoch": 0.26, "learning_rate": 2.242537750204846e-07, "logits/chosen": -2.8930914402008057, "logits/rejected": -3.1478424072265625, "logps/chosen": -243.96063232421875, "logps/rejected": -336.82122802734375, "loss": 0.2154, "rewards/accuracies": 1.0, "rewards/chosen": 0.19600819051265717, "rewards/margins": 2.4578299522399902, "rewards/rejected": -2.261821746826172, "step": 2288 }, { "epoch": 0.26, "learning_rate": 2.2421865855086034e-07, "logits/chosen": -3.2064030170440674, "logits/rejected": -3.1297738552093506, "logps/chosen": -206.52078247070312, "logps/rejected": -337.63446044921875, "loss": 0.2045, "rewards/accuracies": 0.875, "rewards/chosen": 0.3067767322063446, "rewards/margins": 2.6514620780944824, "rewards/rejected": -2.3446850776672363, "step": 2289 }, { "epoch": 0.26, "learning_rate": 2.241835420812361e-07, "logits/chosen": -3.1266894340515137, "logits/rejected": -3.0699071884155273, "logps/chosen": -281.8687744140625, "logps/rejected": -320.9389343261719, "loss": 0.4521, "rewards/accuracies": 0.75, "rewards/chosen": -0.01643332466483116, "rewards/margins": 0.9680436849594116, "rewards/rejected": -0.9844770431518555, "step": 2290 }, { "epoch": 0.26, "learning_rate": 2.2414842561161182e-07, "logits/chosen": -2.4313154220581055, "logits/rejected": -2.5549235343933105, "logps/chosen": -265.5350036621094, "logps/rejected": -290.1593322753906, "loss": 0.4554, "rewards/accuracies": 0.75, "rewards/chosen": -0.18767738342285156, "rewards/margins": 0.8938902616500854, "rewards/rejected": -1.081567645072937, "step": 2291 }, { "epoch": 0.26, "learning_rate": 2.2411330914198758e-07, "logits/chosen": -3.4065773487091064, "logits/rejected": -3.153229236602783, "logps/chosen": -350.052001953125, "logps/rejected": -348.8642883300781, "loss": 0.5971, "rewards/accuracies": 0.75, "rewards/chosen": -0.45484742522239685, "rewards/margins": 0.8081840872764587, "rewards/rejected": -1.2630316019058228, "step": 2292 }, { "epoch": 0.26, "learning_rate": 2.2407819267236333e-07, "logits/chosen": -3.0807082653045654, "logits/rejected": -3.3376882076263428, "logps/chosen": -333.7201843261719, "logps/rejected": -363.69647216796875, "loss": 0.4653, "rewards/accuracies": 0.625, "rewards/chosen": 0.3106958568096161, "rewards/margins": 1.1861242055892944, "rewards/rejected": -0.8754282593727112, "step": 2293 }, { "epoch": 0.26, "learning_rate": 2.2404307620273906e-07, "logits/chosen": -3.154092311859131, "logits/rejected": -3.6609325408935547, "logps/chosen": -148.61679077148438, "logps/rejected": -237.959716796875, "loss": 0.1795, "rewards/accuracies": 0.875, "rewards/chosen": -0.24423475563526154, "rewards/margins": 2.550305128097534, "rewards/rejected": -2.794539451599121, "step": 2294 }, { "epoch": 0.26, "learning_rate": 2.2400795973311484e-07, "logits/chosen": -3.5372209548950195, "logits/rejected": -3.53617000579834, "logps/chosen": -328.703857421875, "logps/rejected": -260.7945861816406, "loss": 0.3041, "rewards/accuracies": 0.875, "rewards/chosen": -0.13486038148403168, "rewards/margins": 2.131824254989624, "rewards/rejected": -2.2666842937469482, "step": 2295 }, { "epoch": 0.26, "learning_rate": 2.2397284326349054e-07, "logits/chosen": -3.1791696548461914, "logits/rejected": -3.312778949737549, "logps/chosen": -320.55169677734375, "logps/rejected": -327.502197265625, "loss": 0.3846, "rewards/accuracies": 1.0, "rewards/chosen": 0.15459033846855164, "rewards/margins": 1.1614190340042114, "rewards/rejected": -1.0068286657333374, "step": 2296 }, { "epoch": 0.26, "learning_rate": 2.2393772679386632e-07, "logits/chosen": -3.3655989170074463, "logits/rejected": -3.001248836517334, "logps/chosen": -287.95892333984375, "logps/rejected": -138.47171020507812, "loss": 0.4552, "rewards/accuracies": 0.75, "rewards/chosen": -0.433413565158844, "rewards/margins": 0.844413161277771, "rewards/rejected": -1.2778267860412598, "step": 2297 }, { "epoch": 0.26, "learning_rate": 2.2390261032424207e-07, "logits/chosen": -3.427274703979492, "logits/rejected": -2.9941210746765137, "logps/chosen": -381.20654296875, "logps/rejected": -271.62371826171875, "loss": 0.3796, "rewards/accuracies": 0.75, "rewards/chosen": 0.5053874850273132, "rewards/margins": 2.2276675701141357, "rewards/rejected": -1.7222801446914673, "step": 2298 }, { "epoch": 0.27, "learning_rate": 2.238674938546178e-07, "logits/chosen": -3.1821961402893066, "logits/rejected": -3.077338933944702, "logps/chosen": -140.84918212890625, "logps/rejected": -210.26116943359375, "loss": 0.5102, "rewards/accuracies": 0.75, "rewards/chosen": -0.10953521728515625, "rewards/margins": 1.084930181503296, "rewards/rejected": -1.1944653987884521, "step": 2299 }, { "epoch": 0.27, "learning_rate": 2.2383237738499355e-07, "logits/chosen": -2.7200963497161865, "logits/rejected": -2.6300225257873535, "logps/chosen": -147.16705322265625, "logps/rejected": -205.0081329345703, "loss": 0.3961, "rewards/accuracies": 1.0, "rewards/chosen": -0.1571280062198639, "rewards/margins": 0.8892449140548706, "rewards/rejected": -1.0463730096817017, "step": 2300 }, { "epoch": 0.27, "learning_rate": 2.237972609153693e-07, "logits/chosen": -3.1947226524353027, "logits/rejected": -2.8795852661132812, "logps/chosen": -274.0819091796875, "logps/rejected": -298.4087219238281, "loss": 0.3128, "rewards/accuracies": 0.875, "rewards/chosen": -0.35198846459388733, "rewards/margins": 1.8519809246063232, "rewards/rejected": -2.2039694786071777, "step": 2301 }, { "epoch": 0.27, "learning_rate": 2.2376214444574504e-07, "logits/chosen": -3.267542839050293, "logits/rejected": -3.1906023025512695, "logps/chosen": -134.63031005859375, "logps/rejected": -136.79678344726562, "loss": 0.9008, "rewards/accuracies": 0.5, "rewards/chosen": -0.3767961859703064, "rewards/margins": 0.3492196500301361, "rewards/rejected": -0.7260158061981201, "step": 2302 }, { "epoch": 0.27, "learning_rate": 2.237270279761208e-07, "logits/chosen": -3.2612714767456055, "logits/rejected": -3.221637725830078, "logps/chosen": -149.6515350341797, "logps/rejected": -191.91078186035156, "loss": 0.4007, "rewards/accuracies": 0.875, "rewards/chosen": -0.21605056524276733, "rewards/margins": 1.591149091720581, "rewards/rejected": -1.8071998357772827, "step": 2303 }, { "epoch": 0.27, "learning_rate": 2.2369191150649652e-07, "logits/chosen": -3.7730138301849365, "logits/rejected": -3.7268903255462646, "logps/chosen": -183.25645446777344, "logps/rejected": -159.1355438232422, "loss": 0.349, "rewards/accuracies": 0.875, "rewards/chosen": 0.09885180741548538, "rewards/margins": 1.5030276775360107, "rewards/rejected": -1.4041757583618164, "step": 2304 }, { "epoch": 0.27, "learning_rate": 2.2365679503687227e-07, "logits/chosen": -3.5077261924743652, "logits/rejected": -3.2736268043518066, "logps/chosen": -111.56582641601562, "logps/rejected": -127.36540222167969, "loss": 0.6173, "rewards/accuracies": 0.5, "rewards/chosen": -0.3931378126144409, "rewards/margins": 0.8509576916694641, "rewards/rejected": -1.2440954446792603, "step": 2305 }, { "epoch": 0.27, "learning_rate": 2.2362167856724805e-07, "logits/chosen": -2.835000514984131, "logits/rejected": -2.863379955291748, "logps/chosen": -96.53521728515625, "logps/rejected": -163.51773071289062, "loss": 0.3677, "rewards/accuracies": 0.75, "rewards/chosen": -0.040494468063116074, "rewards/margins": 1.4579505920410156, "rewards/rejected": -1.4984450340270996, "step": 2306 }, { "epoch": 0.27, "learning_rate": 2.2358656209762378e-07, "logits/chosen": -3.65985107421875, "logits/rejected": -3.4756650924682617, "logps/chosen": -284.971923828125, "logps/rejected": -213.6151123046875, "loss": 0.5499, "rewards/accuracies": 0.5, "rewards/chosen": 0.005902983248233795, "rewards/margins": 0.5541761517524719, "rewards/rejected": -0.5482732057571411, "step": 2307 }, { "epoch": 0.27, "learning_rate": 2.2355144562799953e-07, "logits/chosen": -3.4789464473724365, "logits/rejected": -3.4813599586486816, "logps/chosen": -225.6255340576172, "logps/rejected": -251.912353515625, "loss": 0.2841, "rewards/accuracies": 0.75, "rewards/chosen": 0.3957343101501465, "rewards/margins": 2.797872543334961, "rewards/rejected": -2.4021384716033936, "step": 2308 }, { "epoch": 0.27, "learning_rate": 2.2351632915837529e-07, "logits/chosen": -3.571413993835449, "logits/rejected": -3.6593780517578125, "logps/chosen": -227.0102996826172, "logps/rejected": -219.4932861328125, "loss": 0.3867, "rewards/accuracies": 0.875, "rewards/chosen": 0.17431065440177917, "rewards/margins": 0.9619319438934326, "rewards/rejected": -0.7876212000846863, "step": 2309 }, { "epoch": 0.27, "learning_rate": 2.23481212688751e-07, "logits/chosen": -3.300116777420044, "logits/rejected": -3.3857243061065674, "logps/chosen": -203.71774291992188, "logps/rejected": -134.4215087890625, "loss": 0.3669, "rewards/accuracies": 0.875, "rewards/chosen": 0.3072929382324219, "rewards/margins": 0.9881787300109863, "rewards/rejected": -0.6808857917785645, "step": 2310 }, { "epoch": 0.27, "learning_rate": 2.2344609621912677e-07, "logits/chosen": -3.2789535522460938, "logits/rejected": -3.0446102619171143, "logps/chosen": -560.724853515625, "logps/rejected": -352.95025634765625, "loss": 0.4581, "rewards/accuracies": 0.875, "rewards/chosen": -0.07534122467041016, "rewards/margins": 1.242357611656189, "rewards/rejected": -1.3176988363265991, "step": 2311 }, { "epoch": 0.27, "learning_rate": 2.234109797495025e-07, "logits/chosen": -3.2544755935668945, "logits/rejected": -3.3811397552490234, "logps/chosen": -243.82464599609375, "logps/rejected": -356.3487854003906, "loss": 0.4196, "rewards/accuracies": 0.75, "rewards/chosen": 0.06084499508142471, "rewards/margins": 1.62021005153656, "rewards/rejected": -1.559365153312683, "step": 2312 }, { "epoch": 0.27, "learning_rate": 2.2337586327987825e-07, "logits/chosen": -3.1081643104553223, "logits/rejected": -3.223085403442383, "logps/chosen": -141.86044311523438, "logps/rejected": -173.1543426513672, "loss": 0.4082, "rewards/accuracies": 1.0, "rewards/chosen": -0.11057089269161224, "rewards/margins": 0.8069924116134644, "rewards/rejected": -0.9175633192062378, "step": 2313 }, { "epoch": 0.27, "learning_rate": 2.23340746810254e-07, "logits/chosen": -3.9556684494018555, "logits/rejected": -3.7129368782043457, "logps/chosen": -638.3978271484375, "logps/rejected": -383.7227783203125, "loss": 0.2822, "rewards/accuracies": 0.875, "rewards/chosen": 0.24593685567378998, "rewards/margins": 2.0285086631774902, "rewards/rejected": -1.7825716733932495, "step": 2314 }, { "epoch": 0.27, "learning_rate": 2.2330563034062973e-07, "logits/chosen": -3.3882644176483154, "logits/rejected": -3.101759910583496, "logps/chosen": -245.13487243652344, "logps/rejected": -349.3167419433594, "loss": 0.2494, "rewards/accuracies": 0.875, "rewards/chosen": 0.27796271443367004, "rewards/margins": 1.9709676504135132, "rewards/rejected": -1.693004846572876, "step": 2315 }, { "epoch": 0.27, "learning_rate": 2.2327051387100548e-07, "logits/chosen": -3.3345963954925537, "logits/rejected": -3.2201483249664307, "logps/chosen": -171.52688598632812, "logps/rejected": -227.0782928466797, "loss": 0.4838, "rewards/accuracies": 0.625, "rewards/chosen": -0.30608487129211426, "rewards/margins": 1.734795093536377, "rewards/rejected": -2.040879964828491, "step": 2316 }, { "epoch": 0.27, "learning_rate": 2.2323539740138126e-07, "logits/chosen": -2.705482006072998, "logits/rejected": -2.21264910697937, "logps/chosen": -445.8128967285156, "logps/rejected": -307.3106994628906, "loss": 0.1778, "rewards/accuracies": 1.0, "rewards/chosen": 0.6456524729728699, "rewards/margins": 2.289884567260742, "rewards/rejected": -1.6442320346832275, "step": 2317 }, { "epoch": 0.27, "learning_rate": 2.23200280931757e-07, "logits/chosen": -2.2589786052703857, "logits/rejected": -2.402052879333496, "logps/chosen": -430.25799560546875, "logps/rejected": -407.96087646484375, "loss": 0.4989, "rewards/accuracies": 0.625, "rewards/chosen": -0.14651212096214294, "rewards/margins": 0.8603757619857788, "rewards/rejected": -1.0068879127502441, "step": 2318 }, { "epoch": 0.27, "learning_rate": 2.2316516446213274e-07, "logits/chosen": -2.9790573120117188, "logits/rejected": -3.3048415184020996, "logps/chosen": -121.721923828125, "logps/rejected": -189.17747497558594, "loss": 0.6564, "rewards/accuracies": 0.75, "rewards/chosen": -0.3535494804382324, "rewards/margins": 0.9624842405319214, "rewards/rejected": -1.3160337209701538, "step": 2319 }, { "epoch": 0.27, "learning_rate": 2.2313004799250847e-07, "logits/chosen": -2.879859209060669, "logits/rejected": -2.9390296936035156, "logps/chosen": -196.1049041748047, "logps/rejected": -228.0198211669922, "loss": 0.2685, "rewards/accuracies": 0.875, "rewards/chosen": -0.11319644749164581, "rewards/margins": 2.323320150375366, "rewards/rejected": -2.436516761779785, "step": 2320 }, { "epoch": 0.27, "learning_rate": 2.2309493152288423e-07, "logits/chosen": -2.8447914123535156, "logits/rejected": -3.143139362335205, "logps/chosen": -182.441162109375, "logps/rejected": -164.0215606689453, "loss": 0.3091, "rewards/accuracies": 0.75, "rewards/chosen": -0.23421475291252136, "rewards/margins": 2.2441415786743164, "rewards/rejected": -2.47835636138916, "step": 2321 }, { "epoch": 0.27, "learning_rate": 2.2305981505325998e-07, "logits/chosen": -3.70208740234375, "logits/rejected": -3.7647862434387207, "logps/chosen": -141.14073181152344, "logps/rejected": -197.44236755371094, "loss": 0.4809, "rewards/accuracies": 0.625, "rewards/chosen": -0.42657071352005005, "rewards/margins": 1.5194764137268066, "rewards/rejected": -1.9460471868515015, "step": 2322 }, { "epoch": 0.27, "learning_rate": 2.230246985836357e-07, "logits/chosen": -3.2442376613616943, "logits/rejected": -3.343928098678589, "logps/chosen": -117.53077697753906, "logps/rejected": -150.13473510742188, "loss": 0.455, "rewards/accuracies": 0.875, "rewards/chosen": -0.23510989546775818, "rewards/margins": 1.0165972709655762, "rewards/rejected": -1.2517071962356567, "step": 2323 }, { "epoch": 0.27, "learning_rate": 2.2298958211401146e-07, "logits/chosen": -3.367222309112549, "logits/rejected": -3.1284360885620117, "logps/chosen": -178.44094848632812, "logps/rejected": -163.55770874023438, "loss": 0.4787, "rewards/accuracies": 0.875, "rewards/chosen": -0.3506614863872528, "rewards/margins": 1.3835852146148682, "rewards/rejected": -1.7342467308044434, "step": 2324 }, { "epoch": 0.27, "learning_rate": 2.2295446564438722e-07, "logits/chosen": -3.0737433433532715, "logits/rejected": -3.311189889907837, "logps/chosen": -283.84478759765625, "logps/rejected": -379.0599365234375, "loss": 0.2715, "rewards/accuracies": 0.875, "rewards/chosen": 0.3319266438484192, "rewards/margins": 2.4585964679718018, "rewards/rejected": -2.1266698837280273, "step": 2325 }, { "epoch": 0.27, "learning_rate": 2.2291934917476294e-07, "logits/chosen": -3.051520586013794, "logits/rejected": -3.007539749145508, "logps/chosen": -329.06658935546875, "logps/rejected": -350.6230163574219, "loss": 0.175, "rewards/accuracies": 0.875, "rewards/chosen": 0.23875963687896729, "rewards/margins": 2.203643798828125, "rewards/rejected": -1.9648841619491577, "step": 2326 }, { "epoch": 0.27, "learning_rate": 2.228842327051387e-07, "logits/chosen": -3.1601006984710693, "logits/rejected": -3.1559200286865234, "logps/chosen": -338.2620544433594, "logps/rejected": -356.1646728515625, "loss": 0.3907, "rewards/accuracies": 0.875, "rewards/chosen": -0.0801306888461113, "rewards/margins": 1.1949632167816162, "rewards/rejected": -1.275093913078308, "step": 2327 }, { "epoch": 0.27, "learning_rate": 2.2284911623551442e-07, "logits/chosen": -2.860367774963379, "logits/rejected": -3.049008846282959, "logps/chosen": -300.8227233886719, "logps/rejected": -230.3771209716797, "loss": 0.5203, "rewards/accuracies": 0.75, "rewards/chosen": 0.009084880352020264, "rewards/margins": 1.1613528728485107, "rewards/rejected": -1.1522679328918457, "step": 2328 }, { "epoch": 0.27, "learning_rate": 2.228139997658902e-07, "logits/chosen": -3.1793651580810547, "logits/rejected": -2.9272918701171875, "logps/chosen": -147.4453887939453, "logps/rejected": -223.3256378173828, "loss": 0.3405, "rewards/accuracies": 0.875, "rewards/chosen": 0.17410321533679962, "rewards/margins": 1.507697582244873, "rewards/rejected": -1.3335944414138794, "step": 2329 }, { "epoch": 0.27, "learning_rate": 2.2277888329626596e-07, "logits/chosen": -2.694965124130249, "logits/rejected": -2.8185579776763916, "logps/chosen": -228.50149536132812, "logps/rejected": -227.23251342773438, "loss": 0.958, "rewards/accuracies": 0.375, "rewards/chosen": -0.2796976864337921, "rewards/margins": 0.5063636898994446, "rewards/rejected": -0.7860614061355591, "step": 2330 }, { "epoch": 0.27, "learning_rate": 2.2274376682664169e-07, "logits/chosen": -3.498776912689209, "logits/rejected": -3.654204845428467, "logps/chosen": -312.8533935546875, "logps/rejected": -277.02435302734375, "loss": 0.284, "rewards/accuracies": 0.75, "rewards/chosen": 0.10883574187755585, "rewards/margins": 2.124263048171997, "rewards/rejected": -2.015427350997925, "step": 2331 }, { "epoch": 0.27, "learning_rate": 2.2270865035701744e-07, "logits/chosen": -3.4159011840820312, "logits/rejected": -3.096810817718506, "logps/chosen": -205.45248413085938, "logps/rejected": -277.9186096191406, "loss": 0.3513, "rewards/accuracies": 0.875, "rewards/chosen": -0.47306978702545166, "rewards/margins": 1.7605385780334473, "rewards/rejected": -2.2336084842681885, "step": 2332 }, { "epoch": 0.27, "learning_rate": 2.2267353388739317e-07, "logits/chosen": -3.2760019302368164, "logits/rejected": -3.4436821937561035, "logps/chosen": -341.2275695800781, "logps/rejected": -391.20831298828125, "loss": 0.5589, "rewards/accuracies": 0.75, "rewards/chosen": -0.48584336042404175, "rewards/margins": 0.903161346912384, "rewards/rejected": -1.3890047073364258, "step": 2333 }, { "epoch": 0.27, "learning_rate": 2.2263841741776892e-07, "logits/chosen": -3.21671986579895, "logits/rejected": -3.2600603103637695, "logps/chosen": -253.56224060058594, "logps/rejected": -151.76132202148438, "loss": 0.4079, "rewards/accuracies": 0.75, "rewards/chosen": 0.2966836988925934, "rewards/margins": 1.0916377305984497, "rewards/rejected": -0.7949540615081787, "step": 2334 }, { "epoch": 0.27, "learning_rate": 2.2260330094814467e-07, "logits/chosen": -2.8790950775146484, "logits/rejected": -2.8491828441619873, "logps/chosen": -174.6387939453125, "logps/rejected": -179.0290985107422, "loss": 0.7913, "rewards/accuracies": 0.5, "rewards/chosen": -0.7612828612327576, "rewards/margins": 1.011436104774475, "rewards/rejected": -1.7727190256118774, "step": 2335 }, { "epoch": 0.27, "learning_rate": 2.225681844785204e-07, "logits/chosen": -4.086311340332031, "logits/rejected": -3.9808340072631836, "logps/chosen": -235.6486358642578, "logps/rejected": -209.19998168945312, "loss": 0.2772, "rewards/accuracies": 0.875, "rewards/chosen": 0.196656733751297, "rewards/margins": 1.6769564151763916, "rewards/rejected": -1.4802995920181274, "step": 2336 }, { "epoch": 0.27, "learning_rate": 2.2253306800889616e-07, "logits/chosen": -3.225292444229126, "logits/rejected": -3.1381125450134277, "logps/chosen": -326.66363525390625, "logps/rejected": -308.2825622558594, "loss": 0.3076, "rewards/accuracies": 0.875, "rewards/chosen": 0.24390682578086853, "rewards/margins": 2.5328760147094727, "rewards/rejected": -2.288969039916992, "step": 2337 }, { "epoch": 0.27, "learning_rate": 2.224979515392719e-07, "logits/chosen": -3.8074159622192383, "logits/rejected": -3.481955051422119, "logps/chosen": -289.1299743652344, "logps/rejected": -218.94273376464844, "loss": 0.4258, "rewards/accuracies": 0.875, "rewards/chosen": 0.044199563562870026, "rewards/margins": 1.2237370014190674, "rewards/rejected": -1.1795374155044556, "step": 2338 }, { "epoch": 0.27, "learning_rate": 2.2246283506964764e-07, "logits/chosen": -3.373215913772583, "logits/rejected": -3.6372809410095215, "logps/chosen": -182.498291015625, "logps/rejected": -232.4148712158203, "loss": 0.4181, "rewards/accuracies": 0.875, "rewards/chosen": -0.36814001202583313, "rewards/margins": 0.9056377410888672, "rewards/rejected": -1.273777723312378, "step": 2339 }, { "epoch": 0.27, "learning_rate": 2.2242771860002342e-07, "logits/chosen": -3.0227952003479004, "logits/rejected": -2.999978542327881, "logps/chosen": -282.85040283203125, "logps/rejected": -234.45497131347656, "loss": 0.4303, "rewards/accuracies": 0.875, "rewards/chosen": -0.3275481164455414, "rewards/margins": 1.1583924293518066, "rewards/rejected": -1.4859405755996704, "step": 2340 }, { "epoch": 0.27, "learning_rate": 2.2239260213039914e-07, "logits/chosen": -3.8096840381622314, "logits/rejected": -3.490859270095825, "logps/chosen": -263.4416198730469, "logps/rejected": -146.11439514160156, "loss": 0.6514, "rewards/accuracies": 0.625, "rewards/chosen": -0.09396440535783768, "rewards/margins": 1.118654489517212, "rewards/rejected": -1.2126188278198242, "step": 2341 }, { "epoch": 0.27, "learning_rate": 2.223574856607749e-07, "logits/chosen": -3.435049533843994, "logits/rejected": -3.6003971099853516, "logps/chosen": -333.01055908203125, "logps/rejected": -308.71649169921875, "loss": 0.4915, "rewards/accuracies": 0.75, "rewards/chosen": 0.04661216586828232, "rewards/margins": 1.5785157680511475, "rewards/rejected": -1.5319035053253174, "step": 2342 }, { "epoch": 0.27, "learning_rate": 2.2232236919115065e-07, "logits/chosen": -2.554391622543335, "logits/rejected": -2.4934206008911133, "logps/chosen": -445.8623962402344, "logps/rejected": -279.84063720703125, "loss": 0.362, "rewards/accuracies": 0.75, "rewards/chosen": 0.3051025867462158, "rewards/margins": 2.2693121433258057, "rewards/rejected": -1.9642094373703003, "step": 2343 }, { "epoch": 0.27, "learning_rate": 2.2228725272152638e-07, "logits/chosen": -2.769512414932251, "logits/rejected": -2.9518849849700928, "logps/chosen": -325.8170166015625, "logps/rejected": -283.3702392578125, "loss": 0.3585, "rewards/accuracies": 0.75, "rewards/chosen": 0.19465875625610352, "rewards/margins": 1.7896649837493896, "rewards/rejected": -1.5950063467025757, "step": 2344 }, { "epoch": 0.27, "learning_rate": 2.2225213625190213e-07, "logits/chosen": -3.531412124633789, "logits/rejected": -3.475614309310913, "logps/chosen": -179.06591796875, "logps/rejected": -204.80856323242188, "loss": 0.6645, "rewards/accuracies": 0.5, "rewards/chosen": -0.20357200503349304, "rewards/margins": 0.48782479763031006, "rewards/rejected": -0.6913967132568359, "step": 2345 }, { "epoch": 0.27, "learning_rate": 2.222170197822779e-07, "logits/chosen": -1.813718557357788, "logits/rejected": -2.064091205596924, "logps/chosen": -412.2948913574219, "logps/rejected": -246.18441772460938, "loss": 0.154, "rewards/accuracies": 1.0, "rewards/chosen": 0.2545378804206848, "rewards/margins": 2.5239691734313965, "rewards/rejected": -2.2694313526153564, "step": 2346 }, { "epoch": 0.27, "learning_rate": 2.2218190331265361e-07, "logits/chosen": -2.628925323486328, "logits/rejected": -2.913165330886841, "logps/chosen": -106.1702651977539, "logps/rejected": -156.7030487060547, "loss": 0.3892, "rewards/accuracies": 0.75, "rewards/chosen": 0.23329822719097137, "rewards/margins": 2.0119333267211914, "rewards/rejected": -1.7786349058151245, "step": 2347 }, { "epoch": 0.27, "learning_rate": 2.2214678684302937e-07, "logits/chosen": -3.372081756591797, "logits/rejected": -3.3796191215515137, "logps/chosen": -113.38690948486328, "logps/rejected": -193.27114868164062, "loss": 0.1448, "rewards/accuracies": 1.0, "rewards/chosen": 0.6297838687896729, "rewards/margins": 2.606245517730713, "rewards/rejected": -1.97646164894104, "step": 2348 }, { "epoch": 0.27, "learning_rate": 2.221116703734051e-07, "logits/chosen": -3.517207145690918, "logits/rejected": -3.4619932174682617, "logps/chosen": -353.9598083496094, "logps/rejected": -261.6011047363281, "loss": 0.2014, "rewards/accuracies": 1.0, "rewards/chosen": 0.3443419337272644, "rewards/margins": 1.7278470993041992, "rewards/rejected": -1.3835052251815796, "step": 2349 }, { "epoch": 0.27, "learning_rate": 2.2207655390378085e-07, "logits/chosen": -3.1491665840148926, "logits/rejected": -3.0899605751037598, "logps/chosen": -120.68238830566406, "logps/rejected": -180.17630004882812, "loss": 0.2524, "rewards/accuracies": 0.875, "rewards/chosen": -0.046226829290390015, "rewards/margins": 2.2348098754882812, "rewards/rejected": -2.281036853790283, "step": 2350 }, { "epoch": 0.27, "learning_rate": 2.2204143743415663e-07, "logits/chosen": -3.08530330657959, "logits/rejected": -2.5792651176452637, "logps/chosen": -297.60345458984375, "logps/rejected": -146.40257263183594, "loss": 0.8012, "rewards/accuracies": 0.625, "rewards/chosen": -0.4886108636856079, "rewards/margins": 0.6560665965080261, "rewards/rejected": -1.1446774005889893, "step": 2351 }, { "epoch": 0.27, "learning_rate": 2.2200632096453236e-07, "logits/chosen": -3.4146993160247803, "logits/rejected": -3.084359645843506, "logps/chosen": -152.92005920410156, "logps/rejected": -150.97763061523438, "loss": 0.3178, "rewards/accuracies": 0.875, "rewards/chosen": 0.3558596670627594, "rewards/margins": 1.545936942100525, "rewards/rejected": -1.1900771856307983, "step": 2352 }, { "epoch": 0.27, "learning_rate": 2.219712044949081e-07, "logits/chosen": -3.4668455123901367, "logits/rejected": -3.2362961769104004, "logps/chosen": -244.27468872070312, "logps/rejected": -202.76588439941406, "loss": 0.4321, "rewards/accuracies": 0.875, "rewards/chosen": -0.48818355798721313, "rewards/margins": 1.0799890756607056, "rewards/rejected": -1.5681726932525635, "step": 2353 }, { "epoch": 0.27, "learning_rate": 2.2193608802528387e-07, "logits/chosen": -3.551710605621338, "logits/rejected": -3.1826279163360596, "logps/chosen": -377.43194580078125, "logps/rejected": -189.50640869140625, "loss": 0.2818, "rewards/accuracies": 0.875, "rewards/chosen": -0.21525511145591736, "rewards/margins": 1.5323140621185303, "rewards/rejected": -1.7475690841674805, "step": 2354 }, { "epoch": 0.27, "learning_rate": 2.219009715556596e-07, "logits/chosen": -2.9195122718811035, "logits/rejected": -2.8835558891296387, "logps/chosen": -159.45071411132812, "logps/rejected": -159.88906860351562, "loss": 0.4978, "rewards/accuracies": 0.75, "rewards/chosen": -0.4135933816432953, "rewards/margins": 0.9657815098762512, "rewards/rejected": -1.3793747425079346, "step": 2355 }, { "epoch": 0.27, "learning_rate": 2.2186585508603535e-07, "logits/chosen": -2.9549036026000977, "logits/rejected": -2.754807472229004, "logps/chosen": -275.7275085449219, "logps/rejected": -274.50927734375, "loss": 0.488, "rewards/accuracies": 0.75, "rewards/chosen": -0.26981228590011597, "rewards/margins": 1.8283768892288208, "rewards/rejected": -2.098189115524292, "step": 2356 }, { "epoch": 0.27, "learning_rate": 2.2183073861641107e-07, "logits/chosen": -3.308736801147461, "logits/rejected": -3.3536837100982666, "logps/chosen": -318.6805725097656, "logps/rejected": -286.7496032714844, "loss": 0.3379, "rewards/accuracies": 0.75, "rewards/chosen": 0.6624822616577148, "rewards/margins": 2.1455230712890625, "rewards/rejected": -1.4830409288406372, "step": 2357 }, { "epoch": 0.27, "learning_rate": 2.2179562214678683e-07, "logits/chosen": -2.864276170730591, "logits/rejected": -3.079488754272461, "logps/chosen": -342.0240783691406, "logps/rejected": -248.77630615234375, "loss": 0.3909, "rewards/accuracies": 0.75, "rewards/chosen": 0.21693213284015656, "rewards/margins": 2.3979406356811523, "rewards/rejected": -2.1810085773468018, "step": 2358 }, { "epoch": 0.27, "learning_rate": 2.2176050567716258e-07, "logits/chosen": -3.6980276107788086, "logits/rejected": -3.1580188274383545, "logps/chosen": -371.4825439453125, "logps/rejected": -315.49456787109375, "loss": 0.373, "rewards/accuracies": 0.875, "rewards/chosen": 0.5246973037719727, "rewards/margins": 1.3481178283691406, "rewards/rejected": -0.823420524597168, "step": 2359 }, { "epoch": 0.27, "learning_rate": 2.217253892075383e-07, "logits/chosen": -3.343250036239624, "logits/rejected": -2.905071258544922, "logps/chosen": -308.9192199707031, "logps/rejected": -257.91400146484375, "loss": 0.3459, "rewards/accuracies": 0.75, "rewards/chosen": -0.1643187701702118, "rewards/margins": 2.3319225311279297, "rewards/rejected": -2.496241569519043, "step": 2360 }, { "epoch": 0.27, "learning_rate": 2.2169027273791406e-07, "logits/chosen": -3.4855165481567383, "logits/rejected": -3.3890535831451416, "logps/chosen": -160.6721954345703, "logps/rejected": -175.79075622558594, "loss": 0.5457, "rewards/accuracies": 0.875, "rewards/chosen": -0.021240979433059692, "rewards/margins": 0.49967584013938904, "rewards/rejected": -0.5209168195724487, "step": 2361 }, { "epoch": 0.27, "learning_rate": 2.2165515626828984e-07, "logits/chosen": -3.5524086952209473, "logits/rejected": -3.327573776245117, "logps/chosen": -294.0095520019531, "logps/rejected": -249.95724487304688, "loss": 0.7239, "rewards/accuracies": 0.625, "rewards/chosen": -0.5186745524406433, "rewards/margins": 0.7030817270278931, "rewards/rejected": -1.2217562198638916, "step": 2362 }, { "epoch": 0.27, "learning_rate": 2.2162003979866557e-07, "logits/chosen": -2.519744634628296, "logits/rejected": -2.6741037368774414, "logps/chosen": -447.0435485839844, "logps/rejected": -438.4686279296875, "loss": 0.2051, "rewards/accuracies": 0.875, "rewards/chosen": 0.4449297785758972, "rewards/margins": 2.3131048679351807, "rewards/rejected": -1.8681750297546387, "step": 2363 }, { "epoch": 0.27, "learning_rate": 2.2158492332904132e-07, "logits/chosen": -2.644977569580078, "logits/rejected": -3.027986526489258, "logps/chosen": -297.0125732421875, "logps/rejected": -204.385009765625, "loss": 0.3048, "rewards/accuracies": 0.875, "rewards/chosen": 0.03932652249932289, "rewards/margins": 2.2451958656311035, "rewards/rejected": -2.205869436264038, "step": 2364 }, { "epoch": 0.27, "learning_rate": 2.2154980685941705e-07, "logits/chosen": -3.01935076713562, "logits/rejected": -2.8828582763671875, "logps/chosen": -208.87442016601562, "logps/rejected": -217.8555908203125, "loss": 0.4533, "rewards/accuracies": 0.875, "rewards/chosen": -0.011846423149108887, "rewards/margins": 0.9866576194763184, "rewards/rejected": -0.9985040426254272, "step": 2365 }, { "epoch": 0.27, "learning_rate": 2.215146903897928e-07, "logits/chosen": -2.729572057723999, "logits/rejected": -2.713742733001709, "logps/chosen": -271.5955810546875, "logps/rejected": -316.9332275390625, "loss": 0.1964, "rewards/accuracies": 1.0, "rewards/chosen": -0.09065475314855576, "rewards/margins": 2.017472743988037, "rewards/rejected": -2.1081275939941406, "step": 2366 }, { "epoch": 0.27, "learning_rate": 2.2147957392016856e-07, "logits/chosen": -3.062025785446167, "logits/rejected": -3.1488003730773926, "logps/chosen": -231.14218139648438, "logps/rejected": -216.8812713623047, "loss": 0.3381, "rewards/accuracies": 0.75, "rewards/chosen": -0.15053454041481018, "rewards/margins": 1.1994781494140625, "rewards/rejected": -1.3500127792358398, "step": 2367 }, { "epoch": 0.27, "learning_rate": 2.214444574505443e-07, "logits/chosen": -2.9477925300598145, "logits/rejected": -3.02471661567688, "logps/chosen": -263.9590148925781, "logps/rejected": -291.89361572265625, "loss": 0.532, "rewards/accuracies": 0.625, "rewards/chosen": -0.0771530345082283, "rewards/margins": 1.1787853240966797, "rewards/rejected": -1.2559382915496826, "step": 2368 }, { "epoch": 0.27, "learning_rate": 2.2140934098092004e-07, "logits/chosen": -2.950366973876953, "logits/rejected": -2.7080912590026855, "logps/chosen": -213.1247100830078, "logps/rejected": -177.0506591796875, "loss": 0.4165, "rewards/accuracies": 0.75, "rewards/chosen": -0.4065900444984436, "rewards/margins": 1.0092453956604004, "rewards/rejected": -1.4158353805541992, "step": 2369 }, { "epoch": 0.27, "learning_rate": 2.213742245112958e-07, "logits/chosen": -3.305753707885742, "logits/rejected": -3.1292917728424072, "logps/chosen": -219.12379455566406, "logps/rejected": -186.06771850585938, "loss": 0.8962, "rewards/accuracies": 0.75, "rewards/chosen": -0.8282966613769531, "rewards/margins": 0.2661586105823517, "rewards/rejected": -1.094455361366272, "step": 2370 }, { "epoch": 0.27, "learning_rate": 2.2133910804167152e-07, "logits/chosen": -2.7228991985321045, "logits/rejected": -2.8949356079101562, "logps/chosen": -160.4250946044922, "logps/rejected": -223.53753662109375, "loss": 0.2595, "rewards/accuracies": 0.875, "rewards/chosen": -0.30080682039260864, "rewards/margins": 2.7960963249206543, "rewards/rejected": -3.0969035625457764, "step": 2371 }, { "epoch": 0.27, "learning_rate": 2.2130399157204728e-07, "logits/chosen": -3.3176345825195312, "logits/rejected": -3.7617435455322266, "logps/chosen": -103.75447845458984, "logps/rejected": -183.79425048828125, "loss": 0.3138, "rewards/accuracies": 0.875, "rewards/chosen": -0.062077343463897705, "rewards/margins": 1.5383375883102417, "rewards/rejected": -1.6004149913787842, "step": 2372 }, { "epoch": 0.27, "learning_rate": 2.21268875102423e-07, "logits/chosen": -2.5565836429595947, "logits/rejected": -2.4830503463745117, "logps/chosen": -352.04693603515625, "logps/rejected": -152.4482421875, "loss": 0.5654, "rewards/accuracies": 0.625, "rewards/chosen": -0.5806581974029541, "rewards/margins": 0.867794930934906, "rewards/rejected": -1.4484530687332153, "step": 2373 }, { "epoch": 0.27, "learning_rate": 2.2123375863279878e-07, "logits/chosen": -3.0290164947509766, "logits/rejected": -3.0526957511901855, "logps/chosen": -149.31640625, "logps/rejected": -224.25064086914062, "loss": 0.3692, "rewards/accuracies": 0.75, "rewards/chosen": 0.19528524577617645, "rewards/margins": 2.2810399532318115, "rewards/rejected": -2.085754632949829, "step": 2374 }, { "epoch": 0.27, "learning_rate": 2.2119864216317454e-07, "logits/chosen": -3.318833827972412, "logits/rejected": -3.4359707832336426, "logps/chosen": -276.9692077636719, "logps/rejected": -350.209228515625, "loss": 0.3379, "rewards/accuracies": 0.875, "rewards/chosen": -0.12020000070333481, "rewards/margins": 2.2103943824768066, "rewards/rejected": -2.330594539642334, "step": 2375 }, { "epoch": 0.27, "learning_rate": 2.2116352569355026e-07, "logits/chosen": -3.231630802154541, "logits/rejected": -3.184823513031006, "logps/chosen": -203.3464813232422, "logps/rejected": -181.96673583984375, "loss": 0.3575, "rewards/accuracies": 0.875, "rewards/chosen": 0.20183923840522766, "rewards/margins": 1.325305461883545, "rewards/rejected": -1.1234662532806396, "step": 2376 }, { "epoch": 0.27, "learning_rate": 2.2112840922392602e-07, "logits/chosen": -3.107874631881714, "logits/rejected": -2.8020365238189697, "logps/chosen": -211.1685791015625, "logps/rejected": -183.49667358398438, "loss": 0.3015, "rewards/accuracies": 0.875, "rewards/chosen": 0.25903183221817017, "rewards/margins": 1.9042845964431763, "rewards/rejected": -1.6452527046203613, "step": 2377 }, { "epoch": 0.27, "learning_rate": 2.2109329275430175e-07, "logits/chosen": -3.025005578994751, "logits/rejected": -3.2265372276306152, "logps/chosen": -243.36477661132812, "logps/rejected": -337.70404052734375, "loss": 0.3672, "rewards/accuracies": 0.75, "rewards/chosen": -0.35006654262542725, "rewards/margins": 1.5083962678909302, "rewards/rejected": -1.8584628105163574, "step": 2378 }, { "epoch": 0.27, "learning_rate": 2.210581762846775e-07, "logits/chosen": -3.4176032543182373, "logits/rejected": -3.6933553218841553, "logps/chosen": -182.20391845703125, "logps/rejected": -228.9852294921875, "loss": 0.2641, "rewards/accuracies": 0.875, "rewards/chosen": 0.3721153438091278, "rewards/margins": 3.1165225505828857, "rewards/rejected": -2.7444071769714355, "step": 2379 }, { "epoch": 0.27, "learning_rate": 2.2102305981505325e-07, "logits/chosen": -3.6676979064941406, "logits/rejected": -4.063460350036621, "logps/chosen": -135.11375427246094, "logps/rejected": -207.39349365234375, "loss": 0.4093, "rewards/accuracies": 0.75, "rewards/chosen": -0.24123400449752808, "rewards/margins": 1.9569981098175049, "rewards/rejected": -2.198232412338257, "step": 2380 }, { "epoch": 0.27, "learning_rate": 2.2098794334542898e-07, "logits/chosen": -2.870166301727295, "logits/rejected": -2.632882833480835, "logps/chosen": -439.5860290527344, "logps/rejected": -281.8860778808594, "loss": 0.3691, "rewards/accuracies": 0.75, "rewards/chosen": -0.21394672989845276, "rewards/margins": 1.2313594818115234, "rewards/rejected": -1.4453063011169434, "step": 2381 }, { "epoch": 0.27, "learning_rate": 2.2095282687580473e-07, "logits/chosen": -3.380429983139038, "logits/rejected": -3.255276679992676, "logps/chosen": -236.44692993164062, "logps/rejected": -285.94061279296875, "loss": 0.5173, "rewards/accuracies": 0.75, "rewards/chosen": -0.18474343419075012, "rewards/margins": 1.9238542318344116, "rewards/rejected": -2.108597755432129, "step": 2382 }, { "epoch": 0.27, "learning_rate": 2.209177104061805e-07, "logits/chosen": -2.41460919380188, "logits/rejected": -2.5124406814575195, "logps/chosen": -235.37158203125, "logps/rejected": -299.4578552246094, "loss": 0.2426, "rewards/accuracies": 0.875, "rewards/chosen": 0.5221603512763977, "rewards/margins": 2.3304672241210938, "rewards/rejected": -1.80830717086792, "step": 2383 }, { "epoch": 0.27, "learning_rate": 2.2088259393655622e-07, "logits/chosen": -3.5568745136260986, "logits/rejected": -2.92708420753479, "logps/chosen": -370.26666259765625, "logps/rejected": -294.4802551269531, "loss": 0.3657, "rewards/accuracies": 0.75, "rewards/chosen": -0.5223014950752258, "rewards/margins": 1.697596788406372, "rewards/rejected": -2.2198984622955322, "step": 2384 }, { "epoch": 0.27, "learning_rate": 2.20847477466932e-07, "logits/chosen": -3.124657154083252, "logits/rejected": -3.283043622970581, "logps/chosen": -253.21127319335938, "logps/rejected": -238.40670776367188, "loss": 0.3067, "rewards/accuracies": 0.875, "rewards/chosen": 0.33296895027160645, "rewards/margins": 1.9213249683380127, "rewards/rejected": -1.5883560180664062, "step": 2385 }, { "epoch": 0.28, "learning_rate": 2.2081236099730772e-07, "logits/chosen": -2.3110172748565674, "logits/rejected": -2.652594566345215, "logps/chosen": -442.261474609375, "logps/rejected": -365.875732421875, "loss": 0.3976, "rewards/accuracies": 0.75, "rewards/chosen": 0.268903911113739, "rewards/margins": 1.5361521244049072, "rewards/rejected": -1.267248272895813, "step": 2386 }, { "epoch": 0.28, "learning_rate": 2.2077724452768348e-07, "logits/chosen": -2.6843395233154297, "logits/rejected": -2.880399703979492, "logps/chosen": -297.4222106933594, "logps/rejected": -306.49822998046875, "loss": 0.3316, "rewards/accuracies": 0.875, "rewards/chosen": -0.22305253148078918, "rewards/margins": 1.7236433029174805, "rewards/rejected": -1.9466958045959473, "step": 2387 }, { "epoch": 0.28, "learning_rate": 2.2074212805805923e-07, "logits/chosen": -3.077138662338257, "logits/rejected": -3.1536471843719482, "logps/chosen": -182.40321350097656, "logps/rejected": -185.0398712158203, "loss": 0.4377, "rewards/accuracies": 0.625, "rewards/chosen": 0.007775649428367615, "rewards/margins": 1.5823495388031006, "rewards/rejected": -1.5745739936828613, "step": 2388 }, { "epoch": 0.28, "learning_rate": 2.2070701158843496e-07, "logits/chosen": -3.8942527770996094, "logits/rejected": -3.7809829711914062, "logps/chosen": -246.1905975341797, "logps/rejected": -258.8467712402344, "loss": 0.392, "rewards/accuracies": 0.625, "rewards/chosen": 0.3766113817691803, "rewards/margins": 1.232938289642334, "rewards/rejected": -0.8563268780708313, "step": 2389 }, { "epoch": 0.28, "learning_rate": 2.206718951188107e-07, "logits/chosen": -3.1586809158325195, "logits/rejected": -3.657280921936035, "logps/chosen": -280.7421875, "logps/rejected": -308.3777160644531, "loss": 0.3288, "rewards/accuracies": 0.75, "rewards/chosen": 0.14101499319076538, "rewards/margins": 3.210261821746826, "rewards/rejected": -3.069246768951416, "step": 2390 }, { "epoch": 0.28, "learning_rate": 2.2063677864918647e-07, "logits/chosen": -3.016045093536377, "logits/rejected": -2.9702980518341064, "logps/chosen": -411.37939453125, "logps/rejected": -207.38729858398438, "loss": 0.4823, "rewards/accuracies": 0.75, "rewards/chosen": 0.0860232338309288, "rewards/margins": 1.1013425588607788, "rewards/rejected": -1.0153193473815918, "step": 2391 }, { "epoch": 0.28, "learning_rate": 2.206016621795622e-07, "logits/chosen": -3.190985679626465, "logits/rejected": -3.198812961578369, "logps/chosen": -195.42367553710938, "logps/rejected": -239.6539306640625, "loss": 0.4967, "rewards/accuracies": 0.625, "rewards/chosen": 0.005746133625507355, "rewards/margins": 0.8703901767730713, "rewards/rejected": -0.8646441102027893, "step": 2392 }, { "epoch": 0.28, "learning_rate": 2.2056654570993795e-07, "logits/chosen": -2.549551010131836, "logits/rejected": -2.4927704334259033, "logps/chosen": -261.33135986328125, "logps/rejected": -256.83306884765625, "loss": 0.2271, "rewards/accuracies": 1.0, "rewards/chosen": -0.21703052520751953, "rewards/margins": 2.591268539428711, "rewards/rejected": -2.8082990646362305, "step": 2393 }, { "epoch": 0.28, "learning_rate": 2.2053142924031368e-07, "logits/chosen": -3.2351911067962646, "logits/rejected": -3.5481748580932617, "logps/chosen": -113.99003601074219, "logps/rejected": -227.33538818359375, "loss": 0.3424, "rewards/accuracies": 0.875, "rewards/chosen": -0.2743552625179291, "rewards/margins": 1.4932503700256348, "rewards/rejected": -1.7676056623458862, "step": 2394 }, { "epoch": 0.28, "learning_rate": 2.2049631277068943e-07, "logits/chosen": -2.7589290142059326, "logits/rejected": -2.7966554164886475, "logps/chosen": -373.5641174316406, "logps/rejected": -280.7613525390625, "loss": 0.5189, "rewards/accuracies": 0.75, "rewards/chosen": -0.018179267644882202, "rewards/margins": 0.6036527156829834, "rewards/rejected": -0.621832013130188, "step": 2395 }, { "epoch": 0.28, "learning_rate": 2.204611963010652e-07, "logits/chosen": -2.3533010482788086, "logits/rejected": -2.350330352783203, "logps/chosen": -225.6588134765625, "logps/rejected": -245.79742431640625, "loss": 0.453, "rewards/accuracies": 0.625, "rewards/chosen": -0.05030819773674011, "rewards/margins": 0.9770106077194214, "rewards/rejected": -1.0273189544677734, "step": 2396 }, { "epoch": 0.28, "learning_rate": 2.2042607983144094e-07, "logits/chosen": -3.4988393783569336, "logits/rejected": -3.3664591312408447, "logps/chosen": -236.99954223632812, "logps/rejected": -218.96031188964844, "loss": 0.2357, "rewards/accuracies": 1.0, "rewards/chosen": 0.3076488673686981, "rewards/margins": 1.9368271827697754, "rewards/rejected": -1.629178524017334, "step": 2397 }, { "epoch": 0.28, "learning_rate": 2.203909633618167e-07, "logits/chosen": -2.6876895427703857, "logits/rejected": -2.8589649200439453, "logps/chosen": -144.1785888671875, "logps/rejected": -226.133056640625, "loss": 0.2208, "rewards/accuracies": 1.0, "rewards/chosen": 0.2778404951095581, "rewards/margins": 1.5523988008499146, "rewards/rejected": -1.2745583057403564, "step": 2398 }, { "epoch": 0.28, "learning_rate": 2.2035584689219244e-07, "logits/chosen": -2.7386555671691895, "logits/rejected": -2.68749737739563, "logps/chosen": -279.5812683105469, "logps/rejected": -289.7491455078125, "loss": 0.4608, "rewards/accuracies": 0.75, "rewards/chosen": -0.3533630073070526, "rewards/margins": 1.2324186563491821, "rewards/rejected": -1.5857816934585571, "step": 2399 }, { "epoch": 0.28, "learning_rate": 2.2032073042256817e-07, "logits/chosen": -3.0557334423065186, "logits/rejected": -2.8801448345184326, "logps/chosen": -388.2572326660156, "logps/rejected": -261.0730895996094, "loss": 0.6777, "rewards/accuracies": 0.75, "rewards/chosen": -0.23178040981292725, "rewards/margins": 1.0250412225723267, "rewards/rejected": -1.2568215131759644, "step": 2400 }, { "epoch": 0.28, "learning_rate": 2.2028561395294393e-07, "logits/chosen": -3.4354753494262695, "logits/rejected": -3.4416000843048096, "logps/chosen": -197.03607177734375, "logps/rejected": -338.26092529296875, "loss": 0.371, "rewards/accuracies": 0.75, "rewards/chosen": -0.12878771126270294, "rewards/margins": 1.818946123123169, "rewards/rejected": -1.9477338790893555, "step": 2401 }, { "epoch": 0.28, "learning_rate": 2.2025049748331965e-07, "logits/chosen": -3.4868335723876953, "logits/rejected": -3.625142812728882, "logps/chosen": -225.0349884033203, "logps/rejected": -217.71945190429688, "loss": 0.1972, "rewards/accuracies": 1.0, "rewards/chosen": 0.4626690149307251, "rewards/margins": 1.8271989822387695, "rewards/rejected": -1.364530086517334, "step": 2402 }, { "epoch": 0.28, "learning_rate": 2.202153810136954e-07, "logits/chosen": -3.113661289215088, "logits/rejected": -3.020430088043213, "logps/chosen": -235.8622589111328, "logps/rejected": -304.64794921875, "loss": 0.4089, "rewards/accuracies": 0.875, "rewards/chosen": 0.05186605453491211, "rewards/margins": 1.4089009761810303, "rewards/rejected": -1.3570349216461182, "step": 2403 }, { "epoch": 0.28, "learning_rate": 2.2018026454407116e-07, "logits/chosen": -4.021939277648926, "logits/rejected": -3.979612350463867, "logps/chosen": -237.0137939453125, "logps/rejected": -249.8342742919922, "loss": 0.3147, "rewards/accuracies": 0.875, "rewards/chosen": -0.032747410237789154, "rewards/margins": 2.257519245147705, "rewards/rejected": -2.290266752243042, "step": 2404 }, { "epoch": 0.28, "learning_rate": 2.201451480744469e-07, "logits/chosen": -3.4714596271514893, "logits/rejected": -3.585513114929199, "logps/chosen": -185.29000854492188, "logps/rejected": -233.47500610351562, "loss": 0.5943, "rewards/accuracies": 0.625, "rewards/chosen": -0.0920921340584755, "rewards/margins": 1.0241678953170776, "rewards/rejected": -1.116260051727295, "step": 2405 }, { "epoch": 0.28, "learning_rate": 2.2011003160482264e-07, "logits/chosen": -3.750382900238037, "logits/rejected": -3.3204398155212402, "logps/chosen": -333.0164794921875, "logps/rejected": -330.2599792480469, "loss": 0.1647, "rewards/accuracies": 0.875, "rewards/chosen": 0.7107330560684204, "rewards/margins": 2.8035542964935303, "rewards/rejected": -2.0928211212158203, "step": 2406 }, { "epoch": 0.28, "learning_rate": 2.2007491513519842e-07, "logits/chosen": -3.603848457336426, "logits/rejected": -3.0772993564605713, "logps/chosen": -189.6065673828125, "logps/rejected": -232.31509399414062, "loss": 0.3877, "rewards/accuracies": 0.875, "rewards/chosen": -0.19944944977760315, "rewards/margins": 1.851662039756775, "rewards/rejected": -2.0511114597320557, "step": 2407 }, { "epoch": 0.28, "learning_rate": 2.2003979866557415e-07, "logits/chosen": -3.3868868350982666, "logits/rejected": -3.2851734161376953, "logps/chosen": -281.1761779785156, "logps/rejected": -270.9393005371094, "loss": 0.2951, "rewards/accuracies": 0.875, "rewards/chosen": -0.3579005300998688, "rewards/margins": 1.955430269241333, "rewards/rejected": -2.313331127166748, "step": 2408 }, { "epoch": 0.28, "learning_rate": 2.200046821959499e-07, "logits/chosen": -3.0388286113739014, "logits/rejected": -3.128875494003296, "logps/chosen": -120.84025573730469, "logps/rejected": -185.491455078125, "loss": 0.3228, "rewards/accuracies": 0.875, "rewards/chosen": 0.46276143193244934, "rewards/margins": 2.207402229309082, "rewards/rejected": -1.7446409463882446, "step": 2409 }, { "epoch": 0.28, "learning_rate": 2.1996956572632563e-07, "logits/chosen": -3.182076930999756, "logits/rejected": -3.0563042163848877, "logps/chosen": -236.756103515625, "logps/rejected": -262.5376892089844, "loss": 0.4881, "rewards/accuracies": 0.875, "rewards/chosen": -0.16176122426986694, "rewards/margins": 0.8078562021255493, "rewards/rejected": -0.969617486000061, "step": 2410 }, { "epoch": 0.28, "learning_rate": 2.1993444925670138e-07, "logits/chosen": -2.08149790763855, "logits/rejected": -2.1729912757873535, "logps/chosen": -407.6117858886719, "logps/rejected": -252.29693603515625, "loss": 0.4656, "rewards/accuracies": 0.625, "rewards/chosen": 0.3368873596191406, "rewards/margins": 0.8198865652084351, "rewards/rejected": -0.4829992651939392, "step": 2411 }, { "epoch": 0.28, "learning_rate": 2.1989933278707714e-07, "logits/chosen": -3.274141788482666, "logits/rejected": -3.042417526245117, "logps/chosen": -199.77655029296875, "logps/rejected": -204.34878540039062, "loss": 0.3464, "rewards/accuracies": 0.875, "rewards/chosen": -0.03936605527997017, "rewards/margins": 1.9879788160324097, "rewards/rejected": -2.0273449420928955, "step": 2412 }, { "epoch": 0.28, "learning_rate": 2.1986421631745287e-07, "logits/chosen": -3.353074073791504, "logits/rejected": -3.4450905323028564, "logps/chosen": -232.88137817382812, "logps/rejected": -373.7280578613281, "loss": 0.1574, "rewards/accuracies": 1.0, "rewards/chosen": 0.5070069432258606, "rewards/margins": 2.8949999809265137, "rewards/rejected": -2.387993097305298, "step": 2413 }, { "epoch": 0.28, "learning_rate": 2.1982909984782862e-07, "logits/chosen": -3.47517728805542, "logits/rejected": -3.363884925842285, "logps/chosen": -145.9418182373047, "logps/rejected": -180.2159881591797, "loss": 0.3109, "rewards/accuracies": 0.75, "rewards/chosen": 0.09704026579856873, "rewards/margins": 2.841865062713623, "rewards/rejected": -2.7448248863220215, "step": 2414 }, { "epoch": 0.28, "learning_rate": 2.1979398337820437e-07, "logits/chosen": -3.598708391189575, "logits/rejected": -3.782074213027954, "logps/chosen": -135.23104858398438, "logps/rejected": -296.7024841308594, "loss": 0.2586, "rewards/accuracies": 1.0, "rewards/chosen": 0.020806431770324707, "rewards/margins": 2.9122676849365234, "rewards/rejected": -2.891461133956909, "step": 2415 }, { "epoch": 0.28, "learning_rate": 2.197588669085801e-07, "logits/chosen": -3.503203868865967, "logits/rejected": -3.418900728225708, "logps/chosen": -294.357421875, "logps/rejected": -271.21673583984375, "loss": 0.7155, "rewards/accuracies": 0.5, "rewards/chosen": -0.6230675578117371, "rewards/margins": 0.2991870045661926, "rewards/rejected": -0.9222546219825745, "step": 2416 }, { "epoch": 0.28, "learning_rate": 2.1972375043895586e-07, "logits/chosen": -3.8274247646331787, "logits/rejected": -3.6736860275268555, "logps/chosen": -128.14199829101562, "logps/rejected": -233.97918701171875, "loss": 0.6057, "rewards/accuracies": 0.625, "rewards/chosen": -0.10199102759361267, "rewards/margins": 0.7906545400619507, "rewards/rejected": -0.8926455974578857, "step": 2417 }, { "epoch": 0.28, "learning_rate": 2.1968863396933158e-07, "logits/chosen": -3.103031635284424, "logits/rejected": -2.6888766288757324, "logps/chosen": -201.49899291992188, "logps/rejected": -184.32421875, "loss": 0.4075, "rewards/accuracies": 0.875, "rewards/chosen": -0.12238103151321411, "rewards/margins": 1.5899425745010376, "rewards/rejected": -1.7123236656188965, "step": 2418 }, { "epoch": 0.28, "learning_rate": 2.1965351749970736e-07, "logits/chosen": -3.2060046195983887, "logits/rejected": -3.0454280376434326, "logps/chosen": -264.29901123046875, "logps/rejected": -240.30360412597656, "loss": 0.2789, "rewards/accuracies": 0.875, "rewards/chosen": 0.15718016028404236, "rewards/margins": 1.744826316833496, "rewards/rejected": -1.5876461267471313, "step": 2419 }, { "epoch": 0.28, "learning_rate": 2.1961840103008312e-07, "logits/chosen": -3.383967399597168, "logits/rejected": -3.0167808532714844, "logps/chosen": -211.47976684570312, "logps/rejected": -127.46572875976562, "loss": 0.3933, "rewards/accuracies": 0.75, "rewards/chosen": -0.0747290700674057, "rewards/margins": 1.108471155166626, "rewards/rejected": -1.1832003593444824, "step": 2420 }, { "epoch": 0.28, "learning_rate": 2.1958328456045884e-07, "logits/chosen": -3.1186742782592773, "logits/rejected": -3.0124318599700928, "logps/chosen": -356.3381652832031, "logps/rejected": -297.26544189453125, "loss": 0.5971, "rewards/accuracies": 0.75, "rewards/chosen": 0.1529424637556076, "rewards/margins": 1.3905649185180664, "rewards/rejected": -1.2376224994659424, "step": 2421 }, { "epoch": 0.28, "learning_rate": 2.195481680908346e-07, "logits/chosen": -2.620532989501953, "logits/rejected": -3.1646366119384766, "logps/chosen": -335.073974609375, "logps/rejected": -406.078857421875, "loss": 0.1737, "rewards/accuracies": 1.0, "rewards/chosen": 0.00973445177078247, "rewards/margins": 2.3623414039611816, "rewards/rejected": -2.352606773376465, "step": 2422 }, { "epoch": 0.28, "learning_rate": 2.1951305162121033e-07, "logits/chosen": -2.7919023036956787, "logits/rejected": -3.311110258102417, "logps/chosen": -259.38714599609375, "logps/rejected": -214.59597778320312, "loss": 0.1789, "rewards/accuracies": 1.0, "rewards/chosen": 0.2546161711215973, "rewards/margins": 2.521509885787964, "rewards/rejected": -2.2668936252593994, "step": 2423 }, { "epoch": 0.28, "learning_rate": 2.1947793515158608e-07, "logits/chosen": -2.4544413089752197, "logits/rejected": -2.812197208404541, "logps/chosen": -226.77920532226562, "logps/rejected": -232.41847229003906, "loss": 0.2356, "rewards/accuracies": 1.0, "rewards/chosen": 0.029917694628238678, "rewards/margins": 1.5192826986312866, "rewards/rejected": -1.4893651008605957, "step": 2424 }, { "epoch": 0.28, "learning_rate": 2.1944281868196183e-07, "logits/chosen": -3.0819594860076904, "logits/rejected": -3.089066743850708, "logps/chosen": -261.36407470703125, "logps/rejected": -311.8839111328125, "loss": 0.2, "rewards/accuracies": 1.0, "rewards/chosen": -0.3528956174850464, "rewards/margins": 2.5575754642486572, "rewards/rejected": -2.9104714393615723, "step": 2425 }, { "epoch": 0.28, "learning_rate": 2.1940770221233756e-07, "logits/chosen": -4.018985748291016, "logits/rejected": -3.828914165496826, "logps/chosen": -456.403076171875, "logps/rejected": -353.1015930175781, "loss": 0.5421, "rewards/accuracies": 0.75, "rewards/chosen": -1.0994906425476074, "rewards/margins": 2.3149473667144775, "rewards/rejected": -3.414437770843506, "step": 2426 }, { "epoch": 0.28, "learning_rate": 2.1937258574271331e-07, "logits/chosen": -3.517800807952881, "logits/rejected": -3.415616035461426, "logps/chosen": -118.27099609375, "logps/rejected": -123.96674346923828, "loss": 0.5355, "rewards/accuracies": 0.625, "rewards/chosen": 0.4435257315635681, "rewards/margins": 0.7377454042434692, "rewards/rejected": -0.2942197024822235, "step": 2427 }, { "epoch": 0.28, "learning_rate": 2.193374692730891e-07, "logits/chosen": -2.7372374534606934, "logits/rejected": -2.770352602005005, "logps/chosen": -210.067138671875, "logps/rejected": -224.475341796875, "loss": 0.3228, "rewards/accuracies": 0.875, "rewards/chosen": 0.08157536387443542, "rewards/margins": 1.7329797744750977, "rewards/rejected": -1.6514043807983398, "step": 2428 }, { "epoch": 0.28, "learning_rate": 2.193023528034648e-07, "logits/chosen": -3.1419503688812256, "logits/rejected": -2.997148275375366, "logps/chosen": -153.3421173095703, "logps/rejected": -258.42919921875, "loss": 0.1555, "rewards/accuracies": 1.0, "rewards/chosen": -0.08342582732439041, "rewards/margins": 2.4668238162994385, "rewards/rejected": -2.5502495765686035, "step": 2429 }, { "epoch": 0.28, "learning_rate": 2.1926723633384058e-07, "logits/chosen": -3.320883274078369, "logits/rejected": -3.1587305068969727, "logps/chosen": -547.282470703125, "logps/rejected": -454.83868408203125, "loss": 0.321, "rewards/accuracies": 0.875, "rewards/chosen": -0.1741361767053604, "rewards/margins": 1.6035101413726807, "rewards/rejected": -1.7776463031768799, "step": 2430 }, { "epoch": 0.28, "learning_rate": 2.192321198642163e-07, "logits/chosen": -3.7074661254882812, "logits/rejected": -3.3746659755706787, "logps/chosen": -281.88360595703125, "logps/rejected": -231.26478576660156, "loss": 0.2027, "rewards/accuracies": 1.0, "rewards/chosen": -0.16728559136390686, "rewards/margins": 2.1524500846862793, "rewards/rejected": -2.319735527038574, "step": 2431 }, { "epoch": 0.28, "learning_rate": 2.1919700339459206e-07, "logits/chosen": -2.621852397918701, "logits/rejected": -2.738388776779175, "logps/chosen": -230.6024627685547, "logps/rejected": -217.95655822753906, "loss": 0.2405, "rewards/accuracies": 1.0, "rewards/chosen": 0.3766915798187256, "rewards/margins": 2.4472131729125977, "rewards/rejected": -2.070521354675293, "step": 2432 }, { "epoch": 0.28, "learning_rate": 2.191618869249678e-07, "logits/chosen": -2.5729453563690186, "logits/rejected": -2.510162353515625, "logps/chosen": -219.2705841064453, "logps/rejected": -253.13291931152344, "loss": 0.5897, "rewards/accuracies": 0.875, "rewards/chosen": -0.19528169929981232, "rewards/margins": 1.7140196561813354, "rewards/rejected": -1.9093014001846313, "step": 2433 }, { "epoch": 0.28, "learning_rate": 2.1912677045534354e-07, "logits/chosen": -3.5079660415649414, "logits/rejected": -3.80427885055542, "logps/chosen": -322.02587890625, "logps/rejected": -291.5752258300781, "loss": 0.4048, "rewards/accuracies": 0.875, "rewards/chosen": 0.36178719997406006, "rewards/margins": 1.2521203756332397, "rewards/rejected": -0.8903331756591797, "step": 2434 }, { "epoch": 0.28, "learning_rate": 2.190916539857193e-07, "logits/chosen": -3.756080389022827, "logits/rejected": -3.5474343299865723, "logps/chosen": -199.63638305664062, "logps/rejected": -111.511474609375, "loss": 0.4583, "rewards/accuracies": 0.75, "rewards/chosen": 0.256507933139801, "rewards/margins": 1.2519280910491943, "rewards/rejected": -0.9954202175140381, "step": 2435 }, { "epoch": 0.28, "learning_rate": 2.1905653751609505e-07, "logits/chosen": -3.2701361179351807, "logits/rejected": -2.8404221534729004, "logps/chosen": -204.3403778076172, "logps/rejected": -204.09979248046875, "loss": 0.3288, "rewards/accuracies": 0.875, "rewards/chosen": -0.21677035093307495, "rewards/margins": 1.8074097633361816, "rewards/rejected": -2.0241804122924805, "step": 2436 }, { "epoch": 0.28, "learning_rate": 2.1902142104647077e-07, "logits/chosen": -3.6798810958862305, "logits/rejected": -3.082275867462158, "logps/chosen": -324.6055908203125, "logps/rejected": -257.0491943359375, "loss": 0.5478, "rewards/accuracies": 0.875, "rewards/chosen": 0.23082469403743744, "rewards/margins": 1.3557943105697632, "rewards/rejected": -1.1249696016311646, "step": 2437 }, { "epoch": 0.28, "learning_rate": 2.1898630457684653e-07, "logits/chosen": -3.4572887420654297, "logits/rejected": -3.2142372131347656, "logps/chosen": -216.39266967773438, "logps/rejected": -143.99981689453125, "loss": 0.4205, "rewards/accuracies": 0.75, "rewards/chosen": -0.13798195123672485, "rewards/margins": 1.1712979078292847, "rewards/rejected": -1.3092797994613647, "step": 2438 }, { "epoch": 0.28, "learning_rate": 2.1895118810722225e-07, "logits/chosen": -3.1805362701416016, "logits/rejected": -3.137821912765503, "logps/chosen": -320.88018798828125, "logps/rejected": -365.652099609375, "loss": 0.52, "rewards/accuracies": 0.875, "rewards/chosen": 0.0820050984621048, "rewards/margins": 2.076395034790039, "rewards/rejected": -1.9943897724151611, "step": 2439 }, { "epoch": 0.28, "learning_rate": 2.18916071637598e-07, "logits/chosen": -2.8748421669006348, "logits/rejected": -3.0962469577789307, "logps/chosen": -222.6476593017578, "logps/rejected": -240.46768188476562, "loss": 0.3569, "rewards/accuracies": 0.875, "rewards/chosen": 0.22386044263839722, "rewards/margins": 1.2036077976226807, "rewards/rejected": -0.9797472953796387, "step": 2440 }, { "epoch": 0.28, "learning_rate": 2.188809551679738e-07, "logits/chosen": -3.0261402130126953, "logits/rejected": -2.900965690612793, "logps/chosen": -222.828857421875, "logps/rejected": -256.79217529296875, "loss": 0.4532, "rewards/accuracies": 0.875, "rewards/chosen": 0.08372204750776291, "rewards/margins": 1.3232771158218384, "rewards/rejected": -1.2395551204681396, "step": 2441 }, { "epoch": 0.28, "learning_rate": 2.1884583869834952e-07, "logits/chosen": -2.801417589187622, "logits/rejected": -2.838425874710083, "logps/chosen": -157.430419921875, "logps/rejected": -155.1853485107422, "loss": 0.566, "rewards/accuracies": 0.625, "rewards/chosen": -0.3712718188762665, "rewards/margins": 0.6495455503463745, "rewards/rejected": -1.0208172798156738, "step": 2442 }, { "epoch": 0.28, "learning_rate": 2.1881072222872527e-07, "logits/chosen": -2.575927972793579, "logits/rejected": -2.4217116832733154, "logps/chosen": -248.05368041992188, "logps/rejected": -260.1338195800781, "loss": 0.5805, "rewards/accuracies": 0.5, "rewards/chosen": 0.12393037229776382, "rewards/margins": 0.6171920299530029, "rewards/rejected": -0.4932616651058197, "step": 2443 }, { "epoch": 0.28, "learning_rate": 2.1877560575910102e-07, "logits/chosen": -3.6244165897369385, "logits/rejected": -3.8091020584106445, "logps/chosen": -128.9793701171875, "logps/rejected": -192.08836364746094, "loss": 0.5929, "rewards/accuracies": 0.75, "rewards/chosen": -0.5581544637680054, "rewards/margins": 2.3534533977508545, "rewards/rejected": -2.9116082191467285, "step": 2444 }, { "epoch": 0.28, "learning_rate": 2.1874048928947675e-07, "logits/chosen": -3.1359071731567383, "logits/rejected": -2.9606525897979736, "logps/chosen": -403.9376525878906, "logps/rejected": -179.67617797851562, "loss": 0.2565, "rewards/accuracies": 1.0, "rewards/chosen": 0.5444480180740356, "rewards/margins": 1.719770073890686, "rewards/rejected": -1.1753219366073608, "step": 2445 }, { "epoch": 0.28, "learning_rate": 2.187053728198525e-07, "logits/chosen": -3.228257656097412, "logits/rejected": -3.000086545944214, "logps/chosen": -321.0558776855469, "logps/rejected": -194.30027770996094, "loss": 0.4433, "rewards/accuracies": 0.625, "rewards/chosen": 0.12248248606920242, "rewards/margins": 1.4330296516418457, "rewards/rejected": -1.3105472326278687, "step": 2446 }, { "epoch": 0.28, "learning_rate": 2.1867025635022823e-07, "logits/chosen": -3.75832462310791, "logits/rejected": -3.7247042655944824, "logps/chosen": -143.0338134765625, "logps/rejected": -206.94908142089844, "loss": 0.2749, "rewards/accuracies": 1.0, "rewards/chosen": 0.16749566793441772, "rewards/margins": 1.5264328718185425, "rewards/rejected": -1.3589372634887695, "step": 2447 }, { "epoch": 0.28, "learning_rate": 2.1863513988060399e-07, "logits/chosen": -3.0892677307128906, "logits/rejected": -3.06630539894104, "logps/chosen": -261.5441589355469, "logps/rejected": -205.29661560058594, "loss": 0.4242, "rewards/accuracies": 0.875, "rewards/chosen": -0.2856305241584778, "rewards/margins": 0.8872074484825134, "rewards/rejected": -1.1728379726409912, "step": 2448 }, { "epoch": 0.28, "learning_rate": 2.1860002341097974e-07, "logits/chosen": -3.215249538421631, "logits/rejected": -3.2620458602905273, "logps/chosen": -339.4765625, "logps/rejected": -211.56326293945312, "loss": 0.3774, "rewards/accuracies": 0.875, "rewards/chosen": -0.4658085107803345, "rewards/margins": 1.4360580444335938, "rewards/rejected": -1.9018666744232178, "step": 2449 }, { "epoch": 0.28, "learning_rate": 2.1856490694135547e-07, "logits/chosen": -2.947361469268799, "logits/rejected": -2.968222141265869, "logps/chosen": -328.7845764160156, "logps/rejected": -269.6783447265625, "loss": 0.3448, "rewards/accuracies": 0.75, "rewards/chosen": 0.3406258225440979, "rewards/margins": 2.0198113918304443, "rewards/rejected": -1.6791855096817017, "step": 2450 }, { "epoch": 0.28, "learning_rate": 2.1852979047173122e-07, "logits/chosen": -2.203819513320923, "logits/rejected": -2.1777544021606445, "logps/chosen": -308.4319763183594, "logps/rejected": -300.3418884277344, "loss": 0.3182, "rewards/accuracies": 0.875, "rewards/chosen": 0.251875638961792, "rewards/margins": 1.6042571067810059, "rewards/rejected": -1.3523813486099243, "step": 2451 }, { "epoch": 0.28, "learning_rate": 2.18494674002107e-07, "logits/chosen": -3.14052414894104, "logits/rejected": -3.276562213897705, "logps/chosen": -223.97531127929688, "logps/rejected": -298.59765625, "loss": 0.4943, "rewards/accuracies": 0.75, "rewards/chosen": -0.06599549949169159, "rewards/margins": 1.4284056425094604, "rewards/rejected": -1.4944013357162476, "step": 2452 }, { "epoch": 0.28, "learning_rate": 2.1845955753248273e-07, "logits/chosen": -2.912686347961426, "logits/rejected": -3.2183055877685547, "logps/chosen": -120.59547424316406, "logps/rejected": -198.07168579101562, "loss": 0.3565, "rewards/accuracies": 0.75, "rewards/chosen": -0.3066108226776123, "rewards/margins": 2.31168270111084, "rewards/rejected": -2.618293285369873, "step": 2453 }, { "epoch": 0.28, "learning_rate": 2.1842444106285848e-07, "logits/chosen": -2.7521955966949463, "logits/rejected": -3.034346342086792, "logps/chosen": -332.7185974121094, "logps/rejected": -376.32928466796875, "loss": 0.4256, "rewards/accuracies": 0.75, "rewards/chosen": 0.07439442723989487, "rewards/margins": 1.4981579780578613, "rewards/rejected": -1.4237632751464844, "step": 2454 }, { "epoch": 0.28, "learning_rate": 2.183893245932342e-07, "logits/chosen": -2.552117109298706, "logits/rejected": -2.67108416557312, "logps/chosen": -236.9710693359375, "logps/rejected": -329.45623779296875, "loss": 0.5294, "rewards/accuracies": 0.5, "rewards/chosen": 0.021329151466488838, "rewards/margins": 1.016088843345642, "rewards/rejected": -0.9947596788406372, "step": 2455 }, { "epoch": 0.28, "learning_rate": 2.1835420812360996e-07, "logits/chosen": -3.25130033493042, "logits/rejected": -3.200017213821411, "logps/chosen": -194.1268310546875, "logps/rejected": -305.9853210449219, "loss": 0.4243, "rewards/accuracies": 0.875, "rewards/chosen": -0.3273342251777649, "rewards/margins": 2.5751430988311768, "rewards/rejected": -2.902477264404297, "step": 2456 }, { "epoch": 0.28, "learning_rate": 2.1831909165398572e-07, "logits/chosen": -3.3787519931793213, "logits/rejected": -3.4744396209716797, "logps/chosen": -180.8157196044922, "logps/rejected": -131.29067993164062, "loss": 0.3407, "rewards/accuracies": 1.0, "rewards/chosen": -0.07379353046417236, "rewards/margins": 1.1642754077911377, "rewards/rejected": -1.2380690574645996, "step": 2457 }, { "epoch": 0.28, "learning_rate": 2.1828397518436145e-07, "logits/chosen": -2.6219289302825928, "logits/rejected": -2.8350653648376465, "logps/chosen": -247.43992614746094, "logps/rejected": -219.67323303222656, "loss": 0.4002, "rewards/accuracies": 0.75, "rewards/chosen": -0.08709601312875748, "rewards/margins": 1.2533464431762695, "rewards/rejected": -1.340442419052124, "step": 2458 }, { "epoch": 0.28, "learning_rate": 2.182488587147372e-07, "logits/chosen": -2.7775309085845947, "logits/rejected": -2.63043475151062, "logps/chosen": -422.1219482421875, "logps/rejected": -252.15554809570312, "loss": 0.2352, "rewards/accuracies": 1.0, "rewards/chosen": -0.019235610961914062, "rewards/margins": 2.149407386779785, "rewards/rejected": -2.1686432361602783, "step": 2459 }, { "epoch": 0.28, "learning_rate": 2.1821374224511295e-07, "logits/chosen": -4.210098743438721, "logits/rejected": -4.035314083099365, "logps/chosen": -223.18902587890625, "logps/rejected": -210.02633666992188, "loss": 0.5007, "rewards/accuracies": 0.75, "rewards/chosen": 0.06098141521215439, "rewards/margins": 1.4619140625, "rewards/rejected": -1.4009325504302979, "step": 2460 }, { "epoch": 0.28, "learning_rate": 2.1817862577548868e-07, "logits/chosen": -3.3811802864074707, "logits/rejected": -3.4701714515686035, "logps/chosen": -181.46426391601562, "logps/rejected": -219.365966796875, "loss": 0.2773, "rewards/accuracies": 0.875, "rewards/chosen": 0.09229210019111633, "rewards/margins": 2.1939291954040527, "rewards/rejected": -2.1016368865966797, "step": 2461 }, { "epoch": 0.28, "learning_rate": 2.1814350930586446e-07, "logits/chosen": -2.968677520751953, "logits/rejected": -2.926361083984375, "logps/chosen": -263.7087707519531, "logps/rejected": -269.3233337402344, "loss": 0.7407, "rewards/accuracies": 0.625, "rewards/chosen": -0.7600381374359131, "rewards/margins": 0.8985783457756042, "rewards/rejected": -1.6586166620254517, "step": 2462 }, { "epoch": 0.28, "learning_rate": 2.1810839283624016e-07, "logits/chosen": -3.7632837295532227, "logits/rejected": -3.830670118331909, "logps/chosen": -388.6455383300781, "logps/rejected": -365.0164794921875, "loss": 0.4336, "rewards/accuracies": 0.875, "rewards/chosen": -0.449359655380249, "rewards/margins": 1.8442012071609497, "rewards/rejected": -2.293560743331909, "step": 2463 }, { "epoch": 0.28, "learning_rate": 2.1807327636661594e-07, "logits/chosen": -3.249234676361084, "logits/rejected": -3.379873514175415, "logps/chosen": -388.23150634765625, "logps/rejected": -273.75994873046875, "loss": 0.5919, "rewards/accuracies": 0.75, "rewards/chosen": 0.200859934091568, "rewards/margins": 0.6882392168045044, "rewards/rejected": -0.48737916350364685, "step": 2464 }, { "epoch": 0.28, "learning_rate": 2.180381598969917e-07, "logits/chosen": -3.820143699645996, "logits/rejected": -3.7728981971740723, "logps/chosen": -181.84933471679688, "logps/rejected": -76.54154968261719, "loss": 0.8428, "rewards/accuracies": 0.625, "rewards/chosen": -0.39957424998283386, "rewards/margins": 0.039460718631744385, "rewards/rejected": -0.43903499841690063, "step": 2465 }, { "epoch": 0.28, "learning_rate": 2.1800304342736742e-07, "logits/chosen": -3.8907947540283203, "logits/rejected": -3.780057430267334, "logps/chosen": -182.345947265625, "logps/rejected": -196.57447814941406, "loss": 0.5712, "rewards/accuracies": 0.625, "rewards/chosen": 0.20988132059574127, "rewards/margins": 1.065144419670105, "rewards/rejected": -0.8552630543708801, "step": 2466 }, { "epoch": 0.28, "learning_rate": 2.1796792695774318e-07, "logits/chosen": -3.7114272117614746, "logits/rejected": -3.409396171569824, "logps/chosen": -229.558837890625, "logps/rejected": -260.908203125, "loss": 0.4049, "rewards/accuracies": 0.875, "rewards/chosen": -0.011846736073493958, "rewards/margins": 2.6365349292755127, "rewards/rejected": -2.6483817100524902, "step": 2467 }, { "epoch": 0.28, "learning_rate": 2.1793281048811893e-07, "logits/chosen": -3.204981803894043, "logits/rejected": -3.170121431350708, "logps/chosen": -205.509033203125, "logps/rejected": -313.5057678222656, "loss": 0.3047, "rewards/accuracies": 0.875, "rewards/chosen": 0.0013806819915771484, "rewards/margins": 1.938448190689087, "rewards/rejected": -1.9370676279067993, "step": 2468 }, { "epoch": 0.28, "learning_rate": 2.1789769401849466e-07, "logits/chosen": -2.6218063831329346, "logits/rejected": -2.562359094619751, "logps/chosen": -334.5001220703125, "logps/rejected": -213.37925720214844, "loss": 0.7113, "rewards/accuracies": 0.625, "rewards/chosen": -0.808554470539093, "rewards/margins": 0.41915979981422424, "rewards/rejected": -1.2277144193649292, "step": 2469 }, { "epoch": 0.28, "learning_rate": 2.178625775488704e-07, "logits/chosen": -3.3911120891571045, "logits/rejected": -3.8220114707946777, "logps/chosen": -256.6356506347656, "logps/rejected": -254.3651580810547, "loss": 0.2172, "rewards/accuracies": 0.875, "rewards/chosen": 0.04758182168006897, "rewards/margins": 2.3955259323120117, "rewards/rejected": -2.3479440212249756, "step": 2470 }, { "epoch": 0.28, "learning_rate": 2.1782746107924614e-07, "logits/chosen": -2.9821865558624268, "logits/rejected": -2.9906139373779297, "logps/chosen": -305.24993896484375, "logps/rejected": -231.15438842773438, "loss": 0.267, "rewards/accuracies": 1.0, "rewards/chosen": -0.39678874611854553, "rewards/margins": 1.6343164443969727, "rewards/rejected": -2.0311052799224854, "step": 2471 }, { "epoch": 0.28, "learning_rate": 2.177923446096219e-07, "logits/chosen": -2.735107660293579, "logits/rejected": -2.468890905380249, "logps/chosen": -213.21627807617188, "logps/rejected": -118.34982299804688, "loss": 0.8883, "rewards/accuracies": 0.5, "rewards/chosen": -0.5310587882995605, "rewards/margins": -0.013679593801498413, "rewards/rejected": -0.5173791646957397, "step": 2472 }, { "epoch": 0.29, "learning_rate": 2.1775722813999767e-07, "logits/chosen": -3.4001710414886475, "logits/rejected": -3.059512138366699, "logps/chosen": -285.00543212890625, "logps/rejected": -217.40570068359375, "loss": 0.3331, "rewards/accuracies": 0.75, "rewards/chosen": -0.12728194892406464, "rewards/margins": 1.8616950511932373, "rewards/rejected": -1.9889768362045288, "step": 2473 }, { "epoch": 0.29, "learning_rate": 2.1772211167037337e-07, "logits/chosen": -3.1767196655273438, "logits/rejected": -3.449080467224121, "logps/chosen": -263.1763610839844, "logps/rejected": -282.4006042480469, "loss": 0.1196, "rewards/accuracies": 1.0, "rewards/chosen": 0.54848313331604, "rewards/margins": 3.045677423477173, "rewards/rejected": -2.497194290161133, "step": 2474 }, { "epoch": 0.29, "learning_rate": 2.1768699520074916e-07, "logits/chosen": -3.092620849609375, "logits/rejected": -3.1552720069885254, "logps/chosen": -192.42620849609375, "logps/rejected": -260.7003173828125, "loss": 0.3234, "rewards/accuracies": 0.875, "rewards/chosen": 0.18665088713169098, "rewards/margins": 2.06071138381958, "rewards/rejected": -1.8740603923797607, "step": 2475 }, { "epoch": 0.29, "learning_rate": 2.1765187873112488e-07, "logits/chosen": -2.7265820503234863, "logits/rejected": -2.891597270965576, "logps/chosen": -316.4152526855469, "logps/rejected": -281.15380859375, "loss": 0.5573, "rewards/accuracies": 0.625, "rewards/chosen": -0.40571850538253784, "rewards/margins": 1.1902090311050415, "rewards/rejected": -1.5959275960922241, "step": 2476 }, { "epoch": 0.29, "learning_rate": 2.1761676226150064e-07, "logits/chosen": -2.54168701171875, "logits/rejected": -2.733599901199341, "logps/chosen": -228.31362915039062, "logps/rejected": -147.59320068359375, "loss": 0.4779, "rewards/accuracies": 0.625, "rewards/chosen": 0.3791736364364624, "rewards/margins": 1.1870970726013184, "rewards/rejected": -0.807923436164856, "step": 2477 }, { "epoch": 0.29, "learning_rate": 2.175816457918764e-07, "logits/chosen": -2.7614684104919434, "logits/rejected": -2.716972827911377, "logps/chosen": -578.1192016601562, "logps/rejected": -459.1675720214844, "loss": 0.3616, "rewards/accuracies": 0.875, "rewards/chosen": 0.03346632421016693, "rewards/margins": 2.1798722743988037, "rewards/rejected": -2.1464059352874756, "step": 2478 }, { "epoch": 0.29, "learning_rate": 2.1754652932225212e-07, "logits/chosen": -3.615689992904663, "logits/rejected": -3.5108532905578613, "logps/chosen": -226.25509643554688, "logps/rejected": -274.2037353515625, "loss": 0.3841, "rewards/accuracies": 0.75, "rewards/chosen": -0.3713642358779907, "rewards/margins": 3.025009870529175, "rewards/rejected": -3.396373987197876, "step": 2479 }, { "epoch": 0.29, "learning_rate": 2.1751141285262787e-07, "logits/chosen": -2.46044659614563, "logits/rejected": -2.639129638671875, "logps/chosen": -433.7906494140625, "logps/rejected": -299.1061096191406, "loss": 0.1842, "rewards/accuracies": 1.0, "rewards/chosen": -0.10184717178344727, "rewards/margins": 1.8733896017074585, "rewards/rejected": -1.9752366542816162, "step": 2480 }, { "epoch": 0.29, "learning_rate": 2.1747629638300363e-07, "logits/chosen": -3.120635986328125, "logits/rejected": -3.331043004989624, "logps/chosen": -377.9277038574219, "logps/rejected": -202.9185791015625, "loss": 0.3452, "rewards/accuracies": 0.875, "rewards/chosen": -0.18138794600963593, "rewards/margins": 1.8247970342636108, "rewards/rejected": -2.0061850547790527, "step": 2481 }, { "epoch": 0.29, "learning_rate": 2.1744117991337935e-07, "logits/chosen": -3.5460946559906006, "logits/rejected": -3.69732666015625, "logps/chosen": -355.28875732421875, "logps/rejected": -326.8369140625, "loss": 0.6411, "rewards/accuracies": 0.75, "rewards/chosen": -0.4959222972393036, "rewards/margins": 0.5692657232284546, "rewards/rejected": -1.065187931060791, "step": 2482 }, { "epoch": 0.29, "learning_rate": 2.174060634437551e-07, "logits/chosen": -2.488938570022583, "logits/rejected": -2.4261369705200195, "logps/chosen": -467.16400146484375, "logps/rejected": -316.6492004394531, "loss": 0.6097, "rewards/accuracies": 0.75, "rewards/chosen": -0.1638650894165039, "rewards/margins": 0.9151331186294556, "rewards/rejected": -1.0789982080459595, "step": 2483 }, { "epoch": 0.29, "learning_rate": 2.1737094697413083e-07, "logits/chosen": -2.5669665336608887, "logits/rejected": -2.4397716522216797, "logps/chosen": -287.9142761230469, "logps/rejected": -308.5404052734375, "loss": 0.6615, "rewards/accuracies": 0.875, "rewards/chosen": -0.30597782135009766, "rewards/margins": 0.6199995279312134, "rewards/rejected": -0.925977349281311, "step": 2484 }, { "epoch": 0.29, "learning_rate": 2.173358305045066e-07, "logits/chosen": -2.5982868671417236, "logits/rejected": -2.5021119117736816, "logps/chosen": -132.1014404296875, "logps/rejected": -238.27015686035156, "loss": 0.3, "rewards/accuracies": 0.875, "rewards/chosen": 0.17167960107326508, "rewards/margins": 1.7378793954849243, "rewards/rejected": -1.5662000179290771, "step": 2485 }, { "epoch": 0.29, "learning_rate": 2.1730071403488237e-07, "logits/chosen": -3.404895782470703, "logits/rejected": -3.0755972862243652, "logps/chosen": -208.4241485595703, "logps/rejected": -139.91775512695312, "loss": 0.4817, "rewards/accuracies": 0.875, "rewards/chosen": -0.025990381836891174, "rewards/margins": 0.827010989189148, "rewards/rejected": -0.8530014157295227, "step": 2486 }, { "epoch": 0.29, "learning_rate": 2.172655975652581e-07, "logits/chosen": -2.539649248123169, "logits/rejected": -2.4222071170806885, "logps/chosen": -140.99176025390625, "logps/rejected": -267.51507568359375, "loss": 0.3436, "rewards/accuracies": 0.875, "rewards/chosen": -0.1797463446855545, "rewards/margins": 1.2750368118286133, "rewards/rejected": -1.4547832012176514, "step": 2487 }, { "epoch": 0.29, "learning_rate": 2.1723048109563385e-07, "logits/chosen": -2.7007298469543457, "logits/rejected": -2.281275510787964, "logps/chosen": -345.035400390625, "logps/rejected": -217.8950653076172, "loss": 0.402, "rewards/accuracies": 0.875, "rewards/chosen": 0.011309966444969177, "rewards/margins": 1.118504524230957, "rewards/rejected": -1.1071945428848267, "step": 2488 }, { "epoch": 0.29, "learning_rate": 2.171953646260096e-07, "logits/chosen": -2.99935245513916, "logits/rejected": -2.631479263305664, "logps/chosen": -305.01983642578125, "logps/rejected": -213.12973022460938, "loss": 0.6579, "rewards/accuracies": 0.75, "rewards/chosen": 0.11633267253637314, "rewards/margins": 1.1751036643981934, "rewards/rejected": -1.058771014213562, "step": 2489 }, { "epoch": 0.29, "learning_rate": 2.1716024815638533e-07, "logits/chosen": -3.290658473968506, "logits/rejected": -3.1096949577331543, "logps/chosen": -199.30447387695312, "logps/rejected": -240.88946533203125, "loss": 0.241, "rewards/accuracies": 0.875, "rewards/chosen": 0.1683589518070221, "rewards/margins": 1.7305457592010498, "rewards/rejected": -1.5621867179870605, "step": 2490 }, { "epoch": 0.29, "learning_rate": 2.1712513168676108e-07, "logits/chosen": -2.925703525543213, "logits/rejected": -2.7297894954681396, "logps/chosen": -385.4110412597656, "logps/rejected": -379.5626220703125, "loss": 0.2164, "rewards/accuracies": 1.0, "rewards/chosen": 0.10115164518356323, "rewards/margins": 2.2671008110046387, "rewards/rejected": -2.1659491062164307, "step": 2491 }, { "epoch": 0.29, "learning_rate": 2.170900152171368e-07, "logits/chosen": -2.33902645111084, "logits/rejected": -2.3498055934906006, "logps/chosen": -318.0898132324219, "logps/rejected": -296.12347412109375, "loss": 0.6364, "rewards/accuracies": 0.5, "rewards/chosen": -0.2034817636013031, "rewards/margins": 0.799735426902771, "rewards/rejected": -1.003217101097107, "step": 2492 }, { "epoch": 0.29, "learning_rate": 2.1705489874751257e-07, "logits/chosen": -2.928410530090332, "logits/rejected": -3.112292766571045, "logps/chosen": -183.60125732421875, "logps/rejected": -200.45562744140625, "loss": 0.411, "rewards/accuracies": 0.75, "rewards/chosen": -0.17708800733089447, "rewards/margins": 1.5838189125061035, "rewards/rejected": -1.7609069347381592, "step": 2493 }, { "epoch": 0.29, "learning_rate": 2.1701978227788832e-07, "logits/chosen": -2.3980016708374023, "logits/rejected": -2.6251823902130127, "logps/chosen": -479.4037170410156, "logps/rejected": -382.66455078125, "loss": 0.4058, "rewards/accuracies": 0.875, "rewards/chosen": -0.49874454736709595, "rewards/margins": 1.3575128316879272, "rewards/rejected": -1.856257438659668, "step": 2494 }, { "epoch": 0.29, "learning_rate": 2.1698466580826405e-07, "logits/chosen": -2.6151421070098877, "logits/rejected": -2.9194958209991455, "logps/chosen": -319.22930908203125, "logps/rejected": -308.5216369628906, "loss": 0.5001, "rewards/accuracies": 0.75, "rewards/chosen": 0.06488540023565292, "rewards/margins": 0.6228733062744141, "rewards/rejected": -0.5579878687858582, "step": 2495 }, { "epoch": 0.29, "learning_rate": 2.1694954933863983e-07, "logits/chosen": -3.4282264709472656, "logits/rejected": -3.456735134124756, "logps/chosen": -153.66282653808594, "logps/rejected": -176.79000854492188, "loss": 0.6492, "rewards/accuracies": 0.625, "rewards/chosen": -0.5922830104827881, "rewards/margins": 0.6304630041122437, "rewards/rejected": -1.2227461338043213, "step": 2496 }, { "epoch": 0.29, "learning_rate": 2.1691443286901558e-07, "logits/chosen": -3.7550277709960938, "logits/rejected": -3.6395514011383057, "logps/chosen": -533.3838500976562, "logps/rejected": -313.1332702636719, "loss": 0.3975, "rewards/accuracies": 0.75, "rewards/chosen": -0.5365990996360779, "rewards/margins": 1.555617332458496, "rewards/rejected": -2.0922164916992188, "step": 2497 }, { "epoch": 0.29, "learning_rate": 2.168793163993913e-07, "logits/chosen": -3.8064944744110107, "logits/rejected": -4.0364532470703125, "logps/chosen": -105.87013244628906, "logps/rejected": -229.16986083984375, "loss": 0.2893, "rewards/accuracies": 0.875, "rewards/chosen": 0.17462556064128876, "rewards/margins": 3.602158784866333, "rewards/rejected": -3.4275331497192383, "step": 2498 }, { "epoch": 0.29, "learning_rate": 2.1684419992976706e-07, "logits/chosen": -3.3039333820343018, "logits/rejected": -3.3841021060943604, "logps/chosen": -190.86904907226562, "logps/rejected": -275.48675537109375, "loss": 0.1699, "rewards/accuracies": 1.0, "rewards/chosen": -0.039414308965206146, "rewards/margins": 3.3313801288604736, "rewards/rejected": -3.3707942962646484, "step": 2499 }, { "epoch": 0.29, "learning_rate": 2.168090834601428e-07, "logits/chosen": -3.6833810806274414, "logits/rejected": -3.439293622970581, "logps/chosen": -252.95263671875, "logps/rejected": -200.35391235351562, "loss": 0.5439, "rewards/accuracies": 0.625, "rewards/chosen": 0.06675035506486893, "rewards/margins": 0.9833582043647766, "rewards/rejected": -0.9166078567504883, "step": 2500 }, { "epoch": 0.29, "learning_rate": 2.1677396699051854e-07, "logits/chosen": -2.9705872535705566, "logits/rejected": -2.7476954460144043, "logps/chosen": -172.15023803710938, "logps/rejected": -202.17086791992188, "loss": 0.4181, "rewards/accuracies": 0.75, "rewards/chosen": -0.3489150404930115, "rewards/margins": 1.3464860916137695, "rewards/rejected": -1.6954011917114258, "step": 2501 }, { "epoch": 0.29, "learning_rate": 2.167388505208943e-07, "logits/chosen": -3.088698148727417, "logits/rejected": -3.155203342437744, "logps/chosen": -329.0819396972656, "logps/rejected": -288.0634765625, "loss": 0.63, "rewards/accuracies": 0.75, "rewards/chosen": -0.053108327090740204, "rewards/margins": 0.7766106128692627, "rewards/rejected": -0.8297189474105835, "step": 2502 }, { "epoch": 0.29, "learning_rate": 2.1670373405127002e-07, "logits/chosen": -2.687331199645996, "logits/rejected": -2.371080160140991, "logps/chosen": -304.59906005859375, "logps/rejected": -429.2099304199219, "loss": 0.3054, "rewards/accuracies": 0.75, "rewards/chosen": -0.010310269892215729, "rewards/margins": 2.3503966331481934, "rewards/rejected": -2.3607070446014404, "step": 2503 }, { "epoch": 0.29, "learning_rate": 2.1666861758164578e-07, "logits/chosen": -3.417501449584961, "logits/rejected": -3.6031932830810547, "logps/chosen": -85.20997619628906, "logps/rejected": -220.92807006835938, "loss": 0.265, "rewards/accuracies": 1.0, "rewards/chosen": 0.36196520924568176, "rewards/margins": 2.267026424407959, "rewards/rejected": -1.9050612449645996, "step": 2504 }, { "epoch": 0.29, "learning_rate": 2.1663350111202153e-07, "logits/chosen": -2.596254825592041, "logits/rejected": -2.492746591567993, "logps/chosen": -122.69634246826172, "logps/rejected": -126.95936584472656, "loss": 0.5136, "rewards/accuracies": 0.625, "rewards/chosen": 0.0050395820289850235, "rewards/margins": 0.6098289489746094, "rewards/rejected": -0.6047893762588501, "step": 2505 }, { "epoch": 0.29, "learning_rate": 2.1659838464239726e-07, "logits/chosen": -3.1768832206726074, "logits/rejected": -3.516199827194214, "logps/chosen": -301.1124572753906, "logps/rejected": -347.6476745605469, "loss": 0.3775, "rewards/accuracies": 0.875, "rewards/chosen": -0.050177350640296936, "rewards/margins": 2.3043107986450195, "rewards/rejected": -2.3544881343841553, "step": 2506 }, { "epoch": 0.29, "learning_rate": 2.1656326817277304e-07, "logits/chosen": -2.9308624267578125, "logits/rejected": -3.1785178184509277, "logps/chosen": -156.17010498046875, "logps/rejected": -347.2279052734375, "loss": 0.2015, "rewards/accuracies": 0.875, "rewards/chosen": -0.22914133965969086, "rewards/margins": 2.547045946121216, "rewards/rejected": -2.7761874198913574, "step": 2507 }, { "epoch": 0.29, "learning_rate": 2.1652815170314874e-07, "logits/chosen": -2.790318727493286, "logits/rejected": -3.004650354385376, "logps/chosen": -352.009521484375, "logps/rejected": -352.0276794433594, "loss": 0.6534, "rewards/accuracies": 0.625, "rewards/chosen": -0.46743714809417725, "rewards/margins": 0.6324673891067505, "rewards/rejected": -1.0999045372009277, "step": 2508 }, { "epoch": 0.29, "learning_rate": 2.1649303523352452e-07, "logits/chosen": -3.4628429412841797, "logits/rejected": -3.037594795227051, "logps/chosen": -315.0083312988281, "logps/rejected": -223.48208618164062, "loss": 0.6253, "rewards/accuracies": 0.625, "rewards/chosen": -0.1919640600681305, "rewards/margins": 1.0366277694702148, "rewards/rejected": -1.2285919189453125, "step": 2509 }, { "epoch": 0.29, "learning_rate": 2.1645791876390028e-07, "logits/chosen": -3.2440714836120605, "logits/rejected": -3.1097850799560547, "logps/chosen": -363.18780517578125, "logps/rejected": -280.4508056640625, "loss": 0.4513, "rewards/accuracies": 0.875, "rewards/chosen": 0.038079455494880676, "rewards/margins": 1.7575528621673584, "rewards/rejected": -1.7194733619689941, "step": 2510 }, { "epoch": 0.29, "learning_rate": 2.16422802294276e-07, "logits/chosen": -3.711042642593384, "logits/rejected": -3.768460750579834, "logps/chosen": -161.98663330078125, "logps/rejected": -228.379150390625, "loss": 0.4019, "rewards/accuracies": 0.875, "rewards/chosen": -0.23478330671787262, "rewards/margins": 1.4729939699172974, "rewards/rejected": -1.7077771425247192, "step": 2511 }, { "epoch": 0.29, "learning_rate": 2.1638768582465176e-07, "logits/chosen": -2.934598684310913, "logits/rejected": -3.1725707054138184, "logps/chosen": -226.66461181640625, "logps/rejected": -152.4705352783203, "loss": 0.3879, "rewards/accuracies": 0.875, "rewards/chosen": 0.07610709220170975, "rewards/margins": 1.4340448379516602, "rewards/rejected": -1.3579376935958862, "step": 2512 }, { "epoch": 0.29, "learning_rate": 2.163525693550275e-07, "logits/chosen": -3.305607795715332, "logits/rejected": -3.9097390174865723, "logps/chosen": -218.16860961914062, "logps/rejected": -312.2205810546875, "loss": 0.2426, "rewards/accuracies": 1.0, "rewards/chosen": 0.045376554131507874, "rewards/margins": 1.9525346755981445, "rewards/rejected": -1.9071580171585083, "step": 2513 }, { "epoch": 0.29, "learning_rate": 2.1631745288540324e-07, "logits/chosen": -3.5929651260375977, "logits/rejected": -3.5992867946624756, "logps/chosen": -285.7209167480469, "logps/rejected": -367.5343933105469, "loss": 0.2158, "rewards/accuracies": 1.0, "rewards/chosen": 0.06939283758401871, "rewards/margins": 2.251302719116211, "rewards/rejected": -2.1819100379943848, "step": 2514 }, { "epoch": 0.29, "learning_rate": 2.16282336415779e-07, "logits/chosen": -3.190619468688965, "logits/rejected": -3.1460177898406982, "logps/chosen": -171.0941925048828, "logps/rejected": -163.57052612304688, "loss": 0.3984, "rewards/accuracies": 0.75, "rewards/chosen": -0.4423140287399292, "rewards/margins": 1.649274230003357, "rewards/rejected": -2.0915884971618652, "step": 2515 }, { "epoch": 0.29, "learning_rate": 2.1624721994615472e-07, "logits/chosen": -2.877197742462158, "logits/rejected": -2.999577045440674, "logps/chosen": -216.1131134033203, "logps/rejected": -295.2169189453125, "loss": 0.4658, "rewards/accuracies": 0.75, "rewards/chosen": 0.03068036586046219, "rewards/margins": 1.2160348892211914, "rewards/rejected": -1.185354471206665, "step": 2516 }, { "epoch": 0.29, "learning_rate": 2.1621210347653047e-07, "logits/chosen": -3.5639023780822754, "logits/rejected": -3.7309510707855225, "logps/chosen": -276.4259338378906, "logps/rejected": -268.25958251953125, "loss": 0.1469, "rewards/accuracies": 1.0, "rewards/chosen": 0.48831528425216675, "rewards/margins": 2.5214827060699463, "rewards/rejected": -2.0331673622131348, "step": 2517 }, { "epoch": 0.29, "learning_rate": 2.1617698700690625e-07, "logits/chosen": -2.5635616779327393, "logits/rejected": -2.519801139831543, "logps/chosen": -291.76727294921875, "logps/rejected": -278.26409912109375, "loss": 0.5788, "rewards/accuracies": 0.75, "rewards/chosen": -0.5102211833000183, "rewards/margins": 0.7334524393081665, "rewards/rejected": -1.24367356300354, "step": 2518 }, { "epoch": 0.29, "learning_rate": 2.1614187053728195e-07, "logits/chosen": -3.2372398376464844, "logits/rejected": -3.210988759994507, "logps/chosen": -240.8280029296875, "logps/rejected": -197.1525421142578, "loss": 0.4792, "rewards/accuracies": 0.875, "rewards/chosen": -0.21338310837745667, "rewards/margins": 1.8720418214797974, "rewards/rejected": -2.0854249000549316, "step": 2519 }, { "epoch": 0.29, "learning_rate": 2.1610675406765773e-07, "logits/chosen": -3.1961827278137207, "logits/rejected": -3.081766366958618, "logps/chosen": -327.99652099609375, "logps/rejected": -204.96917724609375, "loss": 0.5202, "rewards/accuracies": 0.75, "rewards/chosen": -0.08068427443504333, "rewards/margins": 0.7461433410644531, "rewards/rejected": -0.8268276453018188, "step": 2520 }, { "epoch": 0.29, "learning_rate": 2.1607163759803346e-07, "logits/chosen": -2.6684317588806152, "logits/rejected": -2.4371871948242188, "logps/chosen": -339.41448974609375, "logps/rejected": -303.41058349609375, "loss": 0.472, "rewards/accuracies": 0.875, "rewards/chosen": -0.00563429668545723, "rewards/margins": 1.2148531675338745, "rewards/rejected": -1.2204875946044922, "step": 2521 }, { "epoch": 0.29, "learning_rate": 2.1603652112840922e-07, "logits/chosen": -2.910371780395508, "logits/rejected": -3.216799736022949, "logps/chosen": -251.15113830566406, "logps/rejected": -401.2746276855469, "loss": 0.3364, "rewards/accuracies": 0.75, "rewards/chosen": 0.4468386173248291, "rewards/margins": 2.117999792098999, "rewards/rejected": -1.67116117477417, "step": 2522 }, { "epoch": 0.29, "learning_rate": 2.1600140465878497e-07, "logits/chosen": -2.570773124694824, "logits/rejected": -2.95436692237854, "logps/chosen": -238.2709503173828, "logps/rejected": -237.205322265625, "loss": 0.183, "rewards/accuracies": 1.0, "rewards/chosen": 0.8135675191879272, "rewards/margins": 2.1649529933929443, "rewards/rejected": -1.3513855934143066, "step": 2523 }, { "epoch": 0.29, "learning_rate": 2.159662881891607e-07, "logits/chosen": -3.20365834236145, "logits/rejected": -3.417567491531372, "logps/chosen": -128.83523559570312, "logps/rejected": -233.34886169433594, "loss": 0.3022, "rewards/accuracies": 1.0, "rewards/chosen": 0.24098876118659973, "rewards/margins": 2.528226375579834, "rewards/rejected": -2.2872376441955566, "step": 2524 }, { "epoch": 0.29, "learning_rate": 2.1593117171953645e-07, "logits/chosen": -3.1513779163360596, "logits/rejected": -3.181032180786133, "logps/chosen": -270.15362548828125, "logps/rejected": -331.47271728515625, "loss": 0.4683, "rewards/accuracies": 0.75, "rewards/chosen": -0.9286358952522278, "rewards/margins": 2.737130641937256, "rewards/rejected": -3.665766716003418, "step": 2525 }, { "epoch": 0.29, "learning_rate": 2.158960552499122e-07, "logits/chosen": -2.8657476902008057, "logits/rejected": -2.9526169300079346, "logps/chosen": -357.7826843261719, "logps/rejected": -350.6910400390625, "loss": 0.3207, "rewards/accuracies": 0.875, "rewards/chosen": 0.5264121294021606, "rewards/margins": 1.7213901281356812, "rewards/rejected": -1.19497811794281, "step": 2526 }, { "epoch": 0.29, "learning_rate": 2.1586093878028793e-07, "logits/chosen": -2.9215714931488037, "logits/rejected": -2.723855972290039, "logps/chosen": -175.39907836914062, "logps/rejected": -188.17745971679688, "loss": 0.4783, "rewards/accuracies": 0.75, "rewards/chosen": -0.34966182708740234, "rewards/margins": 0.9792813658714294, "rewards/rejected": -1.3289430141448975, "step": 2527 }, { "epoch": 0.29, "learning_rate": 2.1582582231066369e-07, "logits/chosen": -2.8442330360412598, "logits/rejected": -2.813645839691162, "logps/chosen": -155.81375122070312, "logps/rejected": -274.1573791503906, "loss": 0.4808, "rewards/accuracies": 0.75, "rewards/chosen": -0.5195708870887756, "rewards/margins": 1.7433580160140991, "rewards/rejected": -2.2629289627075195, "step": 2528 }, { "epoch": 0.29, "learning_rate": 2.1579070584103941e-07, "logits/chosen": -3.446017265319824, "logits/rejected": -3.0477077960968018, "logps/chosen": -240.87806701660156, "logps/rejected": -238.14952087402344, "loss": 0.5247, "rewards/accuracies": 0.875, "rewards/chosen": -0.6172577142715454, "rewards/margins": 0.8756598234176636, "rewards/rejected": -1.492917537689209, "step": 2529 }, { "epoch": 0.29, "learning_rate": 2.157555893714152e-07, "logits/chosen": -3.696570634841919, "logits/rejected": -3.5547797679901123, "logps/chosen": -220.66912841796875, "logps/rejected": -238.52651977539062, "loss": 0.8338, "rewards/accuracies": 0.625, "rewards/chosen": -0.8832507133483887, "rewards/margins": 0.30547815561294556, "rewards/rejected": -1.1887288093566895, "step": 2530 }, { "epoch": 0.29, "learning_rate": 2.1572047290179095e-07, "logits/chosen": -3.44429349899292, "logits/rejected": -3.0229990482330322, "logps/chosen": -329.0097351074219, "logps/rejected": -322.86669921875, "loss": 0.5423, "rewards/accuracies": 0.5, "rewards/chosen": -0.8805943727493286, "rewards/margins": 2.0709004402160645, "rewards/rejected": -2.9514946937561035, "step": 2531 }, { "epoch": 0.29, "learning_rate": 2.1568535643216667e-07, "logits/chosen": -2.947079658508301, "logits/rejected": -2.8432843685150146, "logps/chosen": -369.27947998046875, "logps/rejected": -440.1089782714844, "loss": 0.1344, "rewards/accuracies": 1.0, "rewards/chosen": 0.4164121747016907, "rewards/margins": 2.5227394104003906, "rewards/rejected": -2.1063272953033447, "step": 2532 }, { "epoch": 0.29, "learning_rate": 2.1565023996254243e-07, "logits/chosen": -2.693087577819824, "logits/rejected": -2.7966902256011963, "logps/chosen": -246.074951171875, "logps/rejected": -409.5174255371094, "loss": 0.2052, "rewards/accuracies": 0.875, "rewards/chosen": -0.4320990741252899, "rewards/margins": 2.4942519664764404, "rewards/rejected": -2.9263510704040527, "step": 2533 }, { "epoch": 0.29, "learning_rate": 2.1561512349291818e-07, "logits/chosen": -3.289893388748169, "logits/rejected": -3.4131593704223633, "logps/chosen": -265.2598876953125, "logps/rejected": -296.5223388671875, "loss": 0.2348, "rewards/accuracies": 1.0, "rewards/chosen": 0.030001459643244743, "rewards/margins": 1.9808032512664795, "rewards/rejected": -1.9508018493652344, "step": 2534 }, { "epoch": 0.29, "learning_rate": 2.155800070232939e-07, "logits/chosen": -3.509627103805542, "logits/rejected": -3.2508726119995117, "logps/chosen": -332.2469482421875, "logps/rejected": -252.56614685058594, "loss": 1.4328, "rewards/accuracies": 0.5, "rewards/chosen": -0.513911783695221, "rewards/margins": 0.11125612258911133, "rewards/rejected": -0.625167965888977, "step": 2535 }, { "epoch": 0.29, "learning_rate": 2.1554489055366966e-07, "logits/chosen": -3.171164035797119, "logits/rejected": -3.180454730987549, "logps/chosen": -233.70899963378906, "logps/rejected": -226.4114532470703, "loss": 0.8238, "rewards/accuracies": 0.625, "rewards/chosen": -0.6192416548728943, "rewards/margins": 0.14890477061271667, "rewards/rejected": -0.7681463956832886, "step": 2536 }, { "epoch": 0.29, "learning_rate": 2.155097740840454e-07, "logits/chosen": -3.039989709854126, "logits/rejected": -2.9134738445281982, "logps/chosen": -383.3440856933594, "logps/rejected": -268.11175537109375, "loss": 0.4467, "rewards/accuracies": 0.875, "rewards/chosen": 0.14948640763759613, "rewards/margins": 1.0681140422821045, "rewards/rejected": -0.9186275005340576, "step": 2537 }, { "epoch": 0.29, "learning_rate": 2.1547465761442115e-07, "logits/chosen": -2.9930341243743896, "logits/rejected": -3.3929364681243896, "logps/chosen": -208.77340698242188, "logps/rejected": -223.36050415039062, "loss": 0.3317, "rewards/accuracies": 0.875, "rewards/chosen": 0.050011053681373596, "rewards/margins": 1.6575568914413452, "rewards/rejected": -1.6075458526611328, "step": 2538 }, { "epoch": 0.29, "learning_rate": 2.154395411447969e-07, "logits/chosen": -2.3850817680358887, "logits/rejected": -2.4263038635253906, "logps/chosen": -207.4864959716797, "logps/rejected": -342.36761474609375, "loss": 0.7779, "rewards/accuracies": 0.875, "rewards/chosen": 0.11761902272701263, "rewards/margins": 0.9998621940612793, "rewards/rejected": -0.8822430968284607, "step": 2539 }, { "epoch": 0.29, "learning_rate": 2.1540442467517263e-07, "logits/chosen": -3.1116700172424316, "logits/rejected": -3.1031975746154785, "logps/chosen": -450.1258239746094, "logps/rejected": -204.19207763671875, "loss": 0.5807, "rewards/accuracies": 0.625, "rewards/chosen": 0.26815319061279297, "rewards/margins": 0.8769459128379822, "rewards/rejected": -0.6087927222251892, "step": 2540 }, { "epoch": 0.29, "learning_rate": 2.153693082055484e-07, "logits/chosen": -2.4168190956115723, "logits/rejected": -2.45223069190979, "logps/chosen": -480.0483093261719, "logps/rejected": -409.9466552734375, "loss": 0.2286, "rewards/accuracies": 1.0, "rewards/chosen": 0.705863356590271, "rewards/margins": 2.023974895477295, "rewards/rejected": -1.3181114196777344, "step": 2541 }, { "epoch": 0.29, "learning_rate": 2.1533419173592416e-07, "logits/chosen": -3.2713074684143066, "logits/rejected": -3.0394349098205566, "logps/chosen": -320.599853515625, "logps/rejected": -235.3441162109375, "loss": 0.3978, "rewards/accuracies": 0.875, "rewards/chosen": -0.3097507953643799, "rewards/margins": 0.9010695219039917, "rewards/rejected": -1.2108204364776611, "step": 2542 }, { "epoch": 0.29, "learning_rate": 2.152990752662999e-07, "logits/chosen": -3.918215036392212, "logits/rejected": -3.584228754043579, "logps/chosen": -205.4032440185547, "logps/rejected": -176.38973999023438, "loss": 0.5921, "rewards/accuracies": 0.625, "rewards/chosen": -0.47760850191116333, "rewards/margins": 1.3396594524383545, "rewards/rejected": -1.8172677755355835, "step": 2543 }, { "epoch": 0.29, "learning_rate": 2.1526395879667564e-07, "logits/chosen": -3.277860641479492, "logits/rejected": -3.349273681640625, "logps/chosen": -299.9290466308594, "logps/rejected": -234.55770874023438, "loss": 0.2794, "rewards/accuracies": 1.0, "rewards/chosen": 0.28574448823928833, "rewards/margins": 2.639697313308716, "rewards/rejected": -2.3539528846740723, "step": 2544 }, { "epoch": 0.29, "learning_rate": 2.1522884232705137e-07, "logits/chosen": -3.2285404205322266, "logits/rejected": -3.213987350463867, "logps/chosen": -262.91851806640625, "logps/rejected": -211.62171936035156, "loss": 0.5756, "rewards/accuracies": 0.875, "rewards/chosen": -0.03728321194648743, "rewards/margins": 0.7637776136398315, "rewards/rejected": -0.8010609149932861, "step": 2545 }, { "epoch": 0.29, "learning_rate": 2.1519372585742712e-07, "logits/chosen": -3.1560704708099365, "logits/rejected": -2.6517655849456787, "logps/chosen": -351.4471435546875, "logps/rejected": -331.6099853515625, "loss": 0.7588, "rewards/accuracies": 0.75, "rewards/chosen": -0.11821253597736359, "rewards/margins": 1.321184754371643, "rewards/rejected": -1.4393974542617798, "step": 2546 }, { "epoch": 0.29, "learning_rate": 2.1515860938780288e-07, "logits/chosen": -2.8120527267456055, "logits/rejected": -2.7895634174346924, "logps/chosen": -521.9832763671875, "logps/rejected": -383.49774169921875, "loss": 0.6791, "rewards/accuracies": 0.75, "rewards/chosen": -0.25195562839508057, "rewards/margins": 0.43304774165153503, "rewards/rejected": -0.685003399848938, "step": 2547 }, { "epoch": 0.29, "learning_rate": 2.151234929181786e-07, "logits/chosen": -2.7360854148864746, "logits/rejected": -2.5680344104766846, "logps/chosen": -370.57940673828125, "logps/rejected": -304.3961486816406, "loss": 0.2929, "rewards/accuracies": 1.0, "rewards/chosen": -0.1524173766374588, "rewards/margins": 1.5483709573745728, "rewards/rejected": -1.7007882595062256, "step": 2548 }, { "epoch": 0.29, "learning_rate": 2.1508837644855436e-07, "logits/chosen": -3.075923442840576, "logits/rejected": -3.179980754852295, "logps/chosen": -142.56776428222656, "logps/rejected": -177.65765380859375, "loss": 0.2434, "rewards/accuracies": 1.0, "rewards/chosen": 0.2216678261756897, "rewards/margins": 1.9188214540481567, "rewards/rejected": -1.6971535682678223, "step": 2549 }, { "epoch": 0.29, "learning_rate": 2.150532599789301e-07, "logits/chosen": -3.268261432647705, "logits/rejected": -3.513883113861084, "logps/chosen": -170.75173950195312, "logps/rejected": -190.55337524414062, "loss": 0.6115, "rewards/accuracies": 0.625, "rewards/chosen": -0.31909674406051636, "rewards/margins": 0.8725062608718872, "rewards/rejected": -1.1916028261184692, "step": 2550 }, { "epoch": 0.29, "learning_rate": 2.1501814350930584e-07, "logits/chosen": -2.614130973815918, "logits/rejected": -2.864084482192993, "logps/chosen": -337.18707275390625, "logps/rejected": -504.7647399902344, "loss": 0.2505, "rewards/accuracies": 0.875, "rewards/chosen": 0.526158332824707, "rewards/margins": 2.322004795074463, "rewards/rejected": -1.7958464622497559, "step": 2551 }, { "epoch": 0.29, "learning_rate": 2.1498302703968162e-07, "logits/chosen": -2.758056163787842, "logits/rejected": -2.6023311614990234, "logps/chosen": -158.41049194335938, "logps/rejected": -161.56605529785156, "loss": 0.6111, "rewards/accuracies": 0.625, "rewards/chosen": -0.355548232793808, "rewards/margins": 0.38794904947280884, "rewards/rejected": -0.7434972524642944, "step": 2552 }, { "epoch": 0.29, "learning_rate": 2.1494791057005732e-07, "logits/chosen": -2.4254207611083984, "logits/rejected": -2.475022077560425, "logps/chosen": -252.745361328125, "logps/rejected": -207.0758514404297, "loss": 0.3084, "rewards/accuracies": 0.875, "rewards/chosen": 0.6883941888809204, "rewards/margins": 2.0777859687805176, "rewards/rejected": -1.3893918991088867, "step": 2553 }, { "epoch": 0.29, "learning_rate": 2.149127941004331e-07, "logits/chosen": -2.533390522003174, "logits/rejected": -2.7753233909606934, "logps/chosen": -231.60511779785156, "logps/rejected": -182.68295288085938, "loss": 0.5893, "rewards/accuracies": 0.5, "rewards/chosen": -0.045577578246593475, "rewards/margins": 0.7031983733177185, "rewards/rejected": -0.7487759590148926, "step": 2554 }, { "epoch": 0.29, "learning_rate": 2.1487767763080885e-07, "logits/chosen": -3.790057420730591, "logits/rejected": -3.7323083877563477, "logps/chosen": -226.44937133789062, "logps/rejected": -225.21630859375, "loss": 0.3676, "rewards/accuracies": 1.0, "rewards/chosen": 0.25751665234565735, "rewards/margins": 1.1721609830856323, "rewards/rejected": -0.9146443009376526, "step": 2555 }, { "epoch": 0.29, "learning_rate": 2.1484256116118458e-07, "logits/chosen": -2.9925804138183594, "logits/rejected": -3.332345485687256, "logps/chosen": -247.73797607421875, "logps/rejected": -314.68743896484375, "loss": 0.3908, "rewards/accuracies": 0.75, "rewards/chosen": 0.30423909425735474, "rewards/margins": 1.9378318786621094, "rewards/rejected": -1.6335928440093994, "step": 2556 }, { "epoch": 0.29, "learning_rate": 2.1480744469156034e-07, "logits/chosen": -2.9305856227874756, "logits/rejected": -3.015263557434082, "logps/chosen": -327.0535583496094, "logps/rejected": -324.77947998046875, "loss": 0.1972, "rewards/accuracies": 0.875, "rewards/chosen": 0.8379375338554382, "rewards/margins": 3.107719659805298, "rewards/rejected": -2.269782066345215, "step": 2557 }, { "epoch": 0.29, "learning_rate": 2.147723282219361e-07, "logits/chosen": -2.9105234146118164, "logits/rejected": -2.9280948638916016, "logps/chosen": -111.18600463867188, "logps/rejected": -244.12619018554688, "loss": 0.3171, "rewards/accuracies": 0.875, "rewards/chosen": 0.1900024116039276, "rewards/margins": 2.0559754371643066, "rewards/rejected": -1.8659732341766357, "step": 2558 }, { "epoch": 0.3, "learning_rate": 2.1473721175231182e-07, "logits/chosen": -2.8897509574890137, "logits/rejected": -2.5914406776428223, "logps/chosen": -227.49989318847656, "logps/rejected": -154.2394561767578, "loss": 0.534, "rewards/accuracies": 0.625, "rewards/chosen": -0.501918375492096, "rewards/margins": 0.9221572875976562, "rewards/rejected": -1.4240756034851074, "step": 2559 }, { "epoch": 0.3, "learning_rate": 2.1470209528268757e-07, "logits/chosen": -3.153500556945801, "logits/rejected": -3.268545150756836, "logps/chosen": -436.768310546875, "logps/rejected": -219.27935791015625, "loss": 0.7682, "rewards/accuracies": 0.875, "rewards/chosen": -0.6188145875930786, "rewards/margins": 0.7104479670524597, "rewards/rejected": -1.3292624950408936, "step": 2560 }, { "epoch": 0.3, "learning_rate": 2.146669788130633e-07, "logits/chosen": -3.2748684883117676, "logits/rejected": -3.43843150138855, "logps/chosen": -220.39195251464844, "logps/rejected": -224.68157958984375, "loss": 0.5758, "rewards/accuracies": 0.625, "rewards/chosen": -0.018315039575099945, "rewards/margins": 0.841328501701355, "rewards/rejected": -0.8596435785293579, "step": 2561 }, { "epoch": 0.3, "learning_rate": 2.1463186234343905e-07, "logits/chosen": -3.733929395675659, "logits/rejected": -3.668494701385498, "logps/chosen": -289.71087646484375, "logps/rejected": -191.19894409179688, "loss": 0.2235, "rewards/accuracies": 1.0, "rewards/chosen": 0.12815839052200317, "rewards/margins": 2.065446615219116, "rewards/rejected": -1.9372881650924683, "step": 2562 }, { "epoch": 0.3, "learning_rate": 2.1459674587381483e-07, "logits/chosen": -2.9200353622436523, "logits/rejected": -3.256335735321045, "logps/chosen": -132.3724822998047, "logps/rejected": -162.9464569091797, "loss": 0.3497, "rewards/accuracies": 0.875, "rewards/chosen": 0.36982202529907227, "rewards/margins": 2.162919759750366, "rewards/rejected": -1.793097734451294, "step": 2563 }, { "epoch": 0.3, "learning_rate": 2.1456162940419056e-07, "logits/chosen": -2.884148120880127, "logits/rejected": -2.9516119956970215, "logps/chosen": -181.06773376464844, "logps/rejected": -215.36285400390625, "loss": 0.5618, "rewards/accuracies": 0.75, "rewards/chosen": -0.8933176398277283, "rewards/margins": 1.1003203392028809, "rewards/rejected": -1.993638038635254, "step": 2564 }, { "epoch": 0.3, "learning_rate": 2.1452651293456631e-07, "logits/chosen": -2.8888416290283203, "logits/rejected": -2.7180867195129395, "logps/chosen": -242.84286499023438, "logps/rejected": -236.66026306152344, "loss": 0.35, "rewards/accuracies": 0.75, "rewards/chosen": 0.13380342721939087, "rewards/margins": 1.6444716453552246, "rewards/rejected": -1.5106680393218994, "step": 2565 }, { "epoch": 0.3, "learning_rate": 2.1449139646494204e-07, "logits/chosen": -2.8170509338378906, "logits/rejected": -2.6424622535705566, "logps/chosen": -245.99082946777344, "logps/rejected": -167.81605529785156, "loss": 0.739, "rewards/accuracies": 0.5, "rewards/chosen": -0.394553542137146, "rewards/margins": 0.5255584716796875, "rewards/rejected": -0.9201120138168335, "step": 2566 }, { "epoch": 0.3, "learning_rate": 2.144562799953178e-07, "logits/chosen": -2.9923300743103027, "logits/rejected": -3.051290512084961, "logps/chosen": -250.1763916015625, "logps/rejected": -224.6210479736328, "loss": 0.5478, "rewards/accuracies": 0.75, "rewards/chosen": 0.24610325694084167, "rewards/margins": 1.632594108581543, "rewards/rejected": -1.386490821838379, "step": 2567 }, { "epoch": 0.3, "learning_rate": 2.1442116352569355e-07, "logits/chosen": -3.3210184574127197, "logits/rejected": -3.7519757747650146, "logps/chosen": -245.64862060546875, "logps/rejected": -358.77203369140625, "loss": 0.2442, "rewards/accuracies": 0.875, "rewards/chosen": 0.34369543194770813, "rewards/margins": 2.6013083457946777, "rewards/rejected": -2.257612943649292, "step": 2568 }, { "epoch": 0.3, "learning_rate": 2.1438604705606928e-07, "logits/chosen": -3.4209508895874023, "logits/rejected": -3.5462265014648438, "logps/chosen": -248.39483642578125, "logps/rejected": -252.47625732421875, "loss": 0.4297, "rewards/accuracies": 0.625, "rewards/chosen": -0.6377914547920227, "rewards/margins": 2.059689998626709, "rewards/rejected": -2.697481632232666, "step": 2569 }, { "epoch": 0.3, "learning_rate": 2.1435093058644503e-07, "logits/chosen": -2.7979516983032227, "logits/rejected": -3.2873551845550537, "logps/chosen": -369.0631408691406, "logps/rejected": -342.6510009765625, "loss": 0.2426, "rewards/accuracies": 1.0, "rewards/chosen": 0.07598162442445755, "rewards/margins": 2.6163907051086426, "rewards/rejected": -2.5404090881347656, "step": 2570 }, { "epoch": 0.3, "learning_rate": 2.1431581411682078e-07, "logits/chosen": -3.0711984634399414, "logits/rejected": -3.2525575160980225, "logps/chosen": -269.4237365722656, "logps/rejected": -182.80599975585938, "loss": 0.7902, "rewards/accuracies": 0.5, "rewards/chosen": -1.0086373090744019, "rewards/margins": 1.2930245399475098, "rewards/rejected": -2.301661729812622, "step": 2571 }, { "epoch": 0.3, "learning_rate": 2.142806976471965e-07, "logits/chosen": -3.042958974838257, "logits/rejected": -2.8398070335388184, "logps/chosen": -333.62127685546875, "logps/rejected": -205.80789184570312, "loss": 0.5471, "rewards/accuracies": 0.625, "rewards/chosen": 0.16432519257068634, "rewards/margins": 0.8288993835449219, "rewards/rejected": -0.664574146270752, "step": 2572 }, { "epoch": 0.3, "learning_rate": 2.1424558117757227e-07, "logits/chosen": -1.8645180463790894, "logits/rejected": -1.9161763191223145, "logps/chosen": -320.7381896972656, "logps/rejected": -254.15463256835938, "loss": 0.4689, "rewards/accuracies": 0.75, "rewards/chosen": -0.23828072845935822, "rewards/margins": 1.511570930480957, "rewards/rejected": -1.7498517036437988, "step": 2573 }, { "epoch": 0.3, "learning_rate": 2.14210464707948e-07, "logits/chosen": -3.6634280681610107, "logits/rejected": -3.960866928100586, "logps/chosen": -191.92433166503906, "logps/rejected": -228.18504333496094, "loss": 0.5604, "rewards/accuracies": 0.625, "rewards/chosen": 0.14040455222129822, "rewards/margins": 1.443699598312378, "rewards/rejected": -1.3032950162887573, "step": 2574 }, { "epoch": 0.3, "learning_rate": 2.1417534823832377e-07, "logits/chosen": -2.7769393920898438, "logits/rejected": -2.597374200820923, "logps/chosen": -354.5603332519531, "logps/rejected": -339.4692687988281, "loss": 0.2693, "rewards/accuracies": 0.875, "rewards/chosen": 0.4016771614551544, "rewards/margins": 1.757029414176941, "rewards/rejected": -1.3553521633148193, "step": 2575 }, { "epoch": 0.3, "learning_rate": 2.1414023176869953e-07, "logits/chosen": -2.9424214363098145, "logits/rejected": -2.755504608154297, "logps/chosen": -229.32029724121094, "logps/rejected": -234.5140380859375, "loss": 0.9373, "rewards/accuracies": 0.625, "rewards/chosen": -0.5314222574234009, "rewards/margins": 0.7831764817237854, "rewards/rejected": -1.3145986795425415, "step": 2576 }, { "epoch": 0.3, "learning_rate": 2.1410511529907525e-07, "logits/chosen": -3.3437247276306152, "logits/rejected": -3.464768409729004, "logps/chosen": -356.91973876953125, "logps/rejected": -299.2821044921875, "loss": 0.2691, "rewards/accuracies": 0.875, "rewards/chosen": 0.5267232060432434, "rewards/margins": 1.897353172302246, "rewards/rejected": -1.3706300258636475, "step": 2577 }, { "epoch": 0.3, "learning_rate": 2.14069998829451e-07, "logits/chosen": -2.6547083854675293, "logits/rejected": -2.710179567337036, "logps/chosen": -394.1455383300781, "logps/rejected": -322.2351379394531, "loss": 0.2403, "rewards/accuracies": 0.875, "rewards/chosen": 0.2676967680454254, "rewards/margins": 3.0415279865264893, "rewards/rejected": -2.7738311290740967, "step": 2578 }, { "epoch": 0.3, "learning_rate": 2.1403488235982676e-07, "logits/chosen": -3.219705104827881, "logits/rejected": -3.228687286376953, "logps/chosen": -238.00192260742188, "logps/rejected": -455.2807312011719, "loss": 0.3909, "rewards/accuracies": 0.875, "rewards/chosen": -0.32765665650367737, "rewards/margins": 1.227205514907837, "rewards/rejected": -1.5548622608184814, "step": 2579 }, { "epoch": 0.3, "learning_rate": 2.139997658902025e-07, "logits/chosen": -3.0565598011016846, "logits/rejected": -3.000286102294922, "logps/chosen": -234.52352905273438, "logps/rejected": -298.09271240234375, "loss": 0.3102, "rewards/accuracies": 0.875, "rewards/chosen": 0.16635188460350037, "rewards/margins": 1.56252121925354, "rewards/rejected": -1.3961693048477173, "step": 2580 }, { "epoch": 0.3, "learning_rate": 2.1396464942057824e-07, "logits/chosen": -2.6357057094573975, "logits/rejected": -2.665994882583618, "logps/chosen": -280.0640563964844, "logps/rejected": -317.63079833984375, "loss": 0.3203, "rewards/accuracies": 1.0, "rewards/chosen": -0.2933403253555298, "rewards/margins": 1.5820248126983643, "rewards/rejected": -1.8753650188446045, "step": 2581 }, { "epoch": 0.3, "learning_rate": 2.1392953295095397e-07, "logits/chosen": -2.7188618183135986, "logits/rejected": -2.8137030601501465, "logps/chosen": -200.86907958984375, "logps/rejected": -293.8908386230469, "loss": 0.3079, "rewards/accuracies": 1.0, "rewards/chosen": -0.12379367649555206, "rewards/margins": 1.5092010498046875, "rewards/rejected": -1.6329946517944336, "step": 2582 }, { "epoch": 0.3, "learning_rate": 2.1389441648132972e-07, "logits/chosen": -3.7103869915008545, "logits/rejected": -3.593686819076538, "logps/chosen": -253.62274169921875, "logps/rejected": -235.7546844482422, "loss": 0.3558, "rewards/accuracies": 0.75, "rewards/chosen": 0.19297929108142853, "rewards/margins": 1.938727617263794, "rewards/rejected": -1.7457482814788818, "step": 2583 }, { "epoch": 0.3, "learning_rate": 2.1385930001170548e-07, "logits/chosen": -3.245603322982788, "logits/rejected": -3.305307626724243, "logps/chosen": -231.2584228515625, "logps/rejected": -229.70326232910156, "loss": 0.3879, "rewards/accuracies": 0.875, "rewards/chosen": -0.3055317997932434, "rewards/margins": 0.9713259935379028, "rewards/rejected": -1.2768577337265015, "step": 2584 }, { "epoch": 0.3, "learning_rate": 2.138241835420812e-07, "logits/chosen": -2.4296247959136963, "logits/rejected": -2.5137107372283936, "logps/chosen": -269.891845703125, "logps/rejected": -194.33457946777344, "loss": 0.4433, "rewards/accuracies": 0.75, "rewards/chosen": 0.09491641819477081, "rewards/margins": 0.7702800035476685, "rewards/rejected": -0.6753636002540588, "step": 2585 }, { "epoch": 0.3, "learning_rate": 2.1378906707245699e-07, "logits/chosen": -2.8271713256835938, "logits/rejected": -3.096623420715332, "logps/chosen": -280.2093505859375, "logps/rejected": -209.69635009765625, "loss": 0.1166, "rewards/accuracies": 1.0, "rewards/chosen": 0.2109682410955429, "rewards/margins": 2.855173110961914, "rewards/rejected": -2.64420485496521, "step": 2586 }, { "epoch": 0.3, "learning_rate": 2.1375395060283274e-07, "logits/chosen": -3.998806953430176, "logits/rejected": -3.7143378257751465, "logps/chosen": -269.6476745605469, "logps/rejected": -192.35833740234375, "loss": 0.2462, "rewards/accuracies": 1.0, "rewards/chosen": 0.5411118268966675, "rewards/margins": 2.1918482780456543, "rewards/rejected": -1.6507364511489868, "step": 2587 }, { "epoch": 0.3, "learning_rate": 2.1371883413320847e-07, "logits/chosen": -3.212614059448242, "logits/rejected": -3.266993522644043, "logps/chosen": -341.0892333984375, "logps/rejected": -375.3945617675781, "loss": 0.6666, "rewards/accuracies": 0.75, "rewards/chosen": -0.9270532131195068, "rewards/margins": 1.1798666715621948, "rewards/rejected": -2.106920003890991, "step": 2588 }, { "epoch": 0.3, "learning_rate": 2.1368371766358422e-07, "logits/chosen": -3.088294506072998, "logits/rejected": -3.2397639751434326, "logps/chosen": -201.5574493408203, "logps/rejected": -195.56849670410156, "loss": 0.3392, "rewards/accuracies": 0.875, "rewards/chosen": 0.22191745042800903, "rewards/margins": 1.3379775285720825, "rewards/rejected": -1.1160600185394287, "step": 2589 }, { "epoch": 0.3, "learning_rate": 2.1364860119395995e-07, "logits/chosen": -3.3521690368652344, "logits/rejected": -3.457568645477295, "logps/chosen": -227.6110076904297, "logps/rejected": -270.6137390136719, "loss": 0.2606, "rewards/accuracies": 0.875, "rewards/chosen": 0.3614324629306793, "rewards/margins": 3.0437488555908203, "rewards/rejected": -2.682316541671753, "step": 2590 }, { "epoch": 0.3, "learning_rate": 2.136134847243357e-07, "logits/chosen": -3.3407042026519775, "logits/rejected": -3.23598313331604, "logps/chosen": -160.33721923828125, "logps/rejected": -194.5361785888672, "loss": 0.2872, "rewards/accuracies": 0.75, "rewards/chosen": 0.9667548537254333, "rewards/margins": 2.1675119400024414, "rewards/rejected": -1.2007571458816528, "step": 2591 }, { "epoch": 0.3, "learning_rate": 2.1357836825471146e-07, "logits/chosen": -2.746734857559204, "logits/rejected": -2.882901668548584, "logps/chosen": -349.1083984375, "logps/rejected": -295.4512634277344, "loss": 0.3753, "rewards/accuracies": 0.875, "rewards/chosen": 0.31298744678497314, "rewards/margins": 1.3062620162963867, "rewards/rejected": -0.9932745695114136, "step": 2592 }, { "epoch": 0.3, "learning_rate": 2.1354325178508718e-07, "logits/chosen": -3.6634044647216797, "logits/rejected": -3.5774505138397217, "logps/chosen": -103.76362609863281, "logps/rejected": -68.02978515625, "loss": 0.5787, "rewards/accuracies": 0.75, "rewards/chosen": -0.7039738297462463, "rewards/margins": 0.3043333888053894, "rewards/rejected": -1.0083072185516357, "step": 2593 }, { "epoch": 0.3, "learning_rate": 2.1350813531546294e-07, "logits/chosen": -3.461884021759033, "logits/rejected": -3.379714012145996, "logps/chosen": -181.22329711914062, "logps/rejected": -202.13223266601562, "loss": 0.4143, "rewards/accuracies": 0.75, "rewards/chosen": -0.6452523469924927, "rewards/margins": 1.5031282901763916, "rewards/rejected": -2.148380756378174, "step": 2594 }, { "epoch": 0.3, "learning_rate": 2.134730188458387e-07, "logits/chosen": -3.6769375801086426, "logits/rejected": -3.6051723957061768, "logps/chosen": -231.912109375, "logps/rejected": -213.67449951171875, "loss": 0.3504, "rewards/accuracies": 0.875, "rewards/chosen": 0.16769975423812866, "rewards/margins": 2.318512439727783, "rewards/rejected": -2.1508126258850098, "step": 2595 }, { "epoch": 0.3, "learning_rate": 2.1343790237621442e-07, "logits/chosen": -3.837965488433838, "logits/rejected": -3.6957931518554688, "logps/chosen": -233.676513671875, "logps/rejected": -243.48460388183594, "loss": 0.3155, "rewards/accuracies": 0.875, "rewards/chosen": -0.31379270553588867, "rewards/margins": 1.7074849605560303, "rewards/rejected": -2.021277666091919, "step": 2596 }, { "epoch": 0.3, "learning_rate": 2.134027859065902e-07, "logits/chosen": -3.7925162315368652, "logits/rejected": -3.667675495147705, "logps/chosen": -186.47421264648438, "logps/rejected": -257.84417724609375, "loss": 0.2493, "rewards/accuracies": 1.0, "rewards/chosen": 0.24779394268989563, "rewards/margins": 2.5216047763824463, "rewards/rejected": -2.273810863494873, "step": 2597 }, { "epoch": 0.3, "learning_rate": 2.1336766943696593e-07, "logits/chosen": -3.8695459365844727, "logits/rejected": -3.687941789627075, "logps/chosen": -194.97474670410156, "logps/rejected": -273.35821533203125, "loss": 0.5707, "rewards/accuracies": 0.75, "rewards/chosen": -0.31056395173072815, "rewards/margins": 1.2100341320037842, "rewards/rejected": -1.52059805393219, "step": 2598 }, { "epoch": 0.3, "learning_rate": 2.1333255296734168e-07, "logits/chosen": -3.2129340171813965, "logits/rejected": -3.2441821098327637, "logps/chosen": -240.37953186035156, "logps/rejected": -171.22369384765625, "loss": 0.5942, "rewards/accuracies": 0.875, "rewards/chosen": 0.24090737104415894, "rewards/margins": 1.164582371711731, "rewards/rejected": -0.9236750602722168, "step": 2599 }, { "epoch": 0.3, "learning_rate": 2.1329743649771743e-07, "logits/chosen": -2.86246919631958, "logits/rejected": -3.069891929626465, "logps/chosen": -169.1517333984375, "logps/rejected": -285.3898620605469, "loss": 0.2674, "rewards/accuracies": 0.875, "rewards/chosen": 0.13462701439857483, "rewards/margins": 3.2156643867492676, "rewards/rejected": -3.0810372829437256, "step": 2600 }, { "epoch": 0.3, "learning_rate": 2.1326232002809316e-07, "logits/chosen": -3.2694320678710938, "logits/rejected": -3.1203155517578125, "logps/chosen": -225.84344482421875, "logps/rejected": -182.9088134765625, "loss": 0.6947, "rewards/accuracies": 0.625, "rewards/chosen": -0.12488748878240585, "rewards/margins": 0.339462012052536, "rewards/rejected": -0.46434950828552246, "step": 2601 }, { "epoch": 0.3, "learning_rate": 2.1322720355846892e-07, "logits/chosen": -3.072448492050171, "logits/rejected": -2.9065980911254883, "logps/chosen": -371.125732421875, "logps/rejected": -233.52462768554688, "loss": 0.4332, "rewards/accuracies": 0.75, "rewards/chosen": 0.1140744686126709, "rewards/margins": 1.1411091089248657, "rewards/rejected": -1.0270345211029053, "step": 2602 }, { "epoch": 0.3, "learning_rate": 2.1319208708884467e-07, "logits/chosen": -2.8249688148498535, "logits/rejected": -3.252692699432373, "logps/chosen": -143.95408630371094, "logps/rejected": -528.7186889648438, "loss": 0.4371, "rewards/accuracies": 0.625, "rewards/chosen": 0.08502054214477539, "rewards/margins": 2.0863773822784424, "rewards/rejected": -2.001356840133667, "step": 2603 }, { "epoch": 0.3, "learning_rate": 2.131569706192204e-07, "logits/chosen": -3.0703587532043457, "logits/rejected": -2.903543472290039, "logps/chosen": -334.08245849609375, "logps/rejected": -219.74005126953125, "loss": 0.1952, "rewards/accuracies": 1.0, "rewards/chosen": 0.4916192293167114, "rewards/margins": 1.9880692958831787, "rewards/rejected": -1.4964500665664673, "step": 2604 }, { "epoch": 0.3, "learning_rate": 2.1312185414959615e-07, "logits/chosen": -2.659576416015625, "logits/rejected": -2.610520839691162, "logps/chosen": -219.05325317382812, "logps/rejected": -160.3195343017578, "loss": 0.3556, "rewards/accuracies": 0.875, "rewards/chosen": 0.08446864783763885, "rewards/margins": 1.3481814861297607, "rewards/rejected": -1.263712763786316, "step": 2605 }, { "epoch": 0.3, "learning_rate": 2.1308673767997188e-07, "logits/chosen": -3.3327317237854004, "logits/rejected": -3.3206560611724854, "logps/chosen": -227.22195434570312, "logps/rejected": -148.6520233154297, "loss": 0.4152, "rewards/accuracies": 0.75, "rewards/chosen": -0.027636319398880005, "rewards/margins": 1.0642647743225098, "rewards/rejected": -1.091901183128357, "step": 2606 }, { "epoch": 0.3, "learning_rate": 2.1305162121034763e-07, "logits/chosen": -3.3311967849731445, "logits/rejected": -3.4810538291931152, "logps/chosen": -345.34423828125, "logps/rejected": -275.7441101074219, "loss": 0.2028, "rewards/accuracies": 1.0, "rewards/chosen": 0.3735184967517853, "rewards/margins": 2.5044138431549072, "rewards/rejected": -2.1308953762054443, "step": 2607 }, { "epoch": 0.3, "learning_rate": 2.130165047407234e-07, "logits/chosen": -3.479562282562256, "logits/rejected": -3.3001370429992676, "logps/chosen": -222.87506103515625, "logps/rejected": -218.8445281982422, "loss": 0.4056, "rewards/accuracies": 0.625, "rewards/chosen": -0.07859775424003601, "rewards/margins": 1.285064697265625, "rewards/rejected": -1.3636624813079834, "step": 2608 }, { "epoch": 0.3, "learning_rate": 2.1298138827109914e-07, "logits/chosen": -2.7875185012817383, "logits/rejected": -2.8810875415802, "logps/chosen": -190.42747497558594, "logps/rejected": -375.72894287109375, "loss": 0.3292, "rewards/accuracies": 0.875, "rewards/chosen": -0.08984656631946564, "rewards/margins": 1.622300624847412, "rewards/rejected": -1.7121471166610718, "step": 2609 }, { "epoch": 0.3, "learning_rate": 2.129462718014749e-07, "logits/chosen": -2.667707681655884, "logits/rejected": -2.4137344360351562, "logps/chosen": -251.4737548828125, "logps/rejected": -222.12075805664062, "loss": 0.2649, "rewards/accuracies": 1.0, "rewards/chosen": -0.3819400370121002, "rewards/margins": 1.4335930347442627, "rewards/rejected": -1.8155330419540405, "step": 2610 }, { "epoch": 0.3, "learning_rate": 2.1291115533185065e-07, "logits/chosen": -2.9640867710113525, "logits/rejected": -2.9014406204223633, "logps/chosen": -341.06048583984375, "logps/rejected": -276.04901123046875, "loss": 0.3695, "rewards/accuracies": 0.75, "rewards/chosen": -0.027166053652763367, "rewards/margins": 1.784468173980713, "rewards/rejected": -1.8116341829299927, "step": 2611 }, { "epoch": 0.3, "learning_rate": 2.1287603886222637e-07, "logits/chosen": -4.178925037384033, "logits/rejected": -3.6387276649475098, "logps/chosen": -269.6713562011719, "logps/rejected": -149.16397094726562, "loss": 0.3383, "rewards/accuracies": 0.875, "rewards/chosen": 0.18593506515026093, "rewards/margins": 1.5617293119430542, "rewards/rejected": -1.3757941722869873, "step": 2612 }, { "epoch": 0.3, "learning_rate": 2.1284092239260213e-07, "logits/chosen": -2.8688013553619385, "logits/rejected": -2.8532214164733887, "logps/chosen": -415.79522705078125, "logps/rejected": -301.96832275390625, "loss": 1.1092, "rewards/accuracies": 0.375, "rewards/chosen": -1.008213996887207, "rewards/margins": 0.527400016784668, "rewards/rejected": -1.535614013671875, "step": 2613 }, { "epoch": 0.3, "learning_rate": 2.1280580592297786e-07, "logits/chosen": -2.6170856952667236, "logits/rejected": -2.904116153717041, "logps/chosen": -361.64971923828125, "logps/rejected": -264.6898498535156, "loss": 0.4149, "rewards/accuracies": 0.75, "rewards/chosen": -0.051516562700271606, "rewards/margins": 1.5424813032150269, "rewards/rejected": -1.5939979553222656, "step": 2614 }, { "epoch": 0.3, "learning_rate": 2.127706894533536e-07, "logits/chosen": -4.016365051269531, "logits/rejected": -3.613889694213867, "logps/chosen": -295.7431640625, "logps/rejected": -167.41946411132812, "loss": 0.3466, "rewards/accuracies": 0.875, "rewards/chosen": 0.20511853694915771, "rewards/margins": 1.9600541591644287, "rewards/rejected": -1.7549355030059814, "step": 2615 }, { "epoch": 0.3, "learning_rate": 2.1273557298372936e-07, "logits/chosen": -3.0946555137634277, "logits/rejected": -3.032836437225342, "logps/chosen": -306.7103271484375, "logps/rejected": -287.1866760253906, "loss": 0.5942, "rewards/accuracies": 0.625, "rewards/chosen": -0.5122720003128052, "rewards/margins": 1.0914312601089478, "rewards/rejected": -1.6037033796310425, "step": 2616 }, { "epoch": 0.3, "learning_rate": 2.127004565141051e-07, "logits/chosen": -3.9072630405426025, "logits/rejected": -3.914475917816162, "logps/chosen": -95.60518646240234, "logps/rejected": -121.07215881347656, "loss": 0.5527, "rewards/accuracies": 0.625, "rewards/chosen": -0.09743925929069519, "rewards/margins": 0.8458166122436523, "rewards/rejected": -0.9432559013366699, "step": 2617 }, { "epoch": 0.3, "learning_rate": 2.1266534004448084e-07, "logits/chosen": -2.8881773948669434, "logits/rejected": -2.747392416000366, "logps/chosen": -207.5545196533203, "logps/rejected": -231.87057495117188, "loss": 0.3731, "rewards/accuracies": 0.625, "rewards/chosen": -0.22826243937015533, "rewards/margins": 2.211359977722168, "rewards/rejected": -2.439622402191162, "step": 2618 }, { "epoch": 0.3, "learning_rate": 2.1263022357485657e-07, "logits/chosen": -2.5903801918029785, "logits/rejected": -2.3856489658355713, "logps/chosen": -539.1986083984375, "logps/rejected": -277.7161560058594, "loss": 0.3475, "rewards/accuracies": 0.875, "rewards/chosen": -0.10978647321462631, "rewards/margins": 2.167459011077881, "rewards/rejected": -2.27724552154541, "step": 2619 }, { "epoch": 0.3, "learning_rate": 2.1259510710523235e-07, "logits/chosen": -2.7712082862854004, "logits/rejected": -2.854727268218994, "logps/chosen": -286.43865966796875, "logps/rejected": -190.49935913085938, "loss": 1.236, "rewards/accuracies": 0.625, "rewards/chosen": -1.1826320886611938, "rewards/margins": 0.10737287998199463, "rewards/rejected": -1.2900049686431885, "step": 2620 }, { "epoch": 0.3, "learning_rate": 2.125599906356081e-07, "logits/chosen": -3.4522032737731934, "logits/rejected": -3.415865659713745, "logps/chosen": -84.9923324584961, "logps/rejected": -154.8427734375, "loss": 0.5065, "rewards/accuracies": 0.75, "rewards/chosen": 0.007892109453678131, "rewards/margins": 0.7958219647407532, "rewards/rejected": -0.787929892539978, "step": 2621 }, { "epoch": 0.3, "learning_rate": 2.1252487416598383e-07, "logits/chosen": -2.8758649826049805, "logits/rejected": -2.875692844390869, "logps/chosen": -201.611328125, "logps/rejected": -195.94918823242188, "loss": 0.522, "rewards/accuracies": 0.75, "rewards/chosen": -0.0891701877117157, "rewards/margins": 0.9865264892578125, "rewards/rejected": -1.0756967067718506, "step": 2622 }, { "epoch": 0.3, "learning_rate": 2.124897576963596e-07, "logits/chosen": -3.1700446605682373, "logits/rejected": -2.8612446784973145, "logps/chosen": -149.87327575683594, "logps/rejected": -246.7994384765625, "loss": 0.6418, "rewards/accuracies": 0.875, "rewards/chosen": -0.04152717813849449, "rewards/margins": 0.6933844089508057, "rewards/rejected": -0.7349116206169128, "step": 2623 }, { "epoch": 0.3, "learning_rate": 2.1245464122673534e-07, "logits/chosen": -2.560467004776001, "logits/rejected": -2.861370086669922, "logps/chosen": -234.38888549804688, "logps/rejected": -249.68020629882812, "loss": 0.3292, "rewards/accuracies": 1.0, "rewards/chosen": 0.295142263174057, "rewards/margins": 1.493542194366455, "rewards/rejected": -1.1983999013900757, "step": 2624 }, { "epoch": 0.3, "learning_rate": 2.1241952475711107e-07, "logits/chosen": -3.5084965229034424, "logits/rejected": -3.4222421646118164, "logps/chosen": -242.83255004882812, "logps/rejected": -213.56703186035156, "loss": 0.2488, "rewards/accuracies": 1.0, "rewards/chosen": 0.3930262327194214, "rewards/margins": 1.8862134218215942, "rewards/rejected": -1.4931873083114624, "step": 2625 }, { "epoch": 0.3, "learning_rate": 2.1238440828748682e-07, "logits/chosen": -2.7609899044036865, "logits/rejected": -2.445192813873291, "logps/chosen": -261.17474365234375, "logps/rejected": -216.25392150878906, "loss": 0.4826, "rewards/accuracies": 0.875, "rewards/chosen": -0.5984484553337097, "rewards/margins": 0.617791473865509, "rewards/rejected": -1.2162399291992188, "step": 2626 }, { "epoch": 0.3, "learning_rate": 2.1234929181786255e-07, "logits/chosen": -3.1811728477478027, "logits/rejected": -3.130852222442627, "logps/chosen": -364.94818115234375, "logps/rejected": -268.77203369140625, "loss": 0.2916, "rewards/accuracies": 0.875, "rewards/chosen": 0.2814651429653168, "rewards/margins": 1.9705853462219238, "rewards/rejected": -1.6891200542449951, "step": 2627 }, { "epoch": 0.3, "learning_rate": 2.123141753482383e-07, "logits/chosen": -2.4331860542297363, "logits/rejected": -2.4532604217529297, "logps/chosen": -286.90264892578125, "logps/rejected": -229.51002502441406, "loss": 0.2558, "rewards/accuracies": 1.0, "rewards/chosen": 0.6661115884780884, "rewards/margins": 1.7842493057250977, "rewards/rejected": -1.1181378364562988, "step": 2628 }, { "epoch": 0.3, "learning_rate": 2.1227905887861406e-07, "logits/chosen": -3.2210826873779297, "logits/rejected": -3.2571213245391846, "logps/chosen": -233.7112579345703, "logps/rejected": -191.5620574951172, "loss": 0.4412, "rewards/accuracies": 0.875, "rewards/chosen": -0.0010215938091278076, "rewards/margins": 1.6510913372039795, "rewards/rejected": -1.6521129608154297, "step": 2629 }, { "epoch": 0.3, "learning_rate": 2.1224394240898979e-07, "logits/chosen": -3.6039528846740723, "logits/rejected": -3.635333299636841, "logps/chosen": -201.91702270507812, "logps/rejected": -206.30783081054688, "loss": 0.2237, "rewards/accuracies": 0.875, "rewards/chosen": 0.35813838243484497, "rewards/margins": 2.510892629623413, "rewards/rejected": -2.152754306793213, "step": 2630 }, { "epoch": 0.3, "learning_rate": 2.1220882593936557e-07, "logits/chosen": -2.989579677581787, "logits/rejected": -2.7776458263397217, "logps/chosen": -391.1034240722656, "logps/rejected": -455.87969970703125, "loss": 0.5709, "rewards/accuracies": 0.625, "rewards/chosen": -0.36003822088241577, "rewards/margins": 0.9418476223945618, "rewards/rejected": -1.301885724067688, "step": 2631 }, { "epoch": 0.3, "learning_rate": 2.1217370946974132e-07, "logits/chosen": -3.085770845413208, "logits/rejected": -2.9609150886535645, "logps/chosen": -343.3042297363281, "logps/rejected": -253.92869567871094, "loss": 0.2088, "rewards/accuracies": 1.0, "rewards/chosen": -0.1349964290857315, "rewards/margins": 2.3497674465179443, "rewards/rejected": -2.4847636222839355, "step": 2632 }, { "epoch": 0.3, "learning_rate": 2.1213859300011705e-07, "logits/chosen": -3.0040388107299805, "logits/rejected": -3.138188362121582, "logps/chosen": -356.5177001953125, "logps/rejected": -225.9423370361328, "loss": 0.2045, "rewards/accuracies": 1.0, "rewards/chosen": 0.18886470794677734, "rewards/margins": 1.668954849243164, "rewards/rejected": -1.4800900220870972, "step": 2633 }, { "epoch": 0.3, "learning_rate": 2.121034765304928e-07, "logits/chosen": -3.32328462600708, "logits/rejected": -2.9965553283691406, "logps/chosen": -102.29368591308594, "logps/rejected": -192.2423095703125, "loss": 0.3921, "rewards/accuracies": 0.875, "rewards/chosen": -0.13508011400699615, "rewards/margins": 1.2319879531860352, "rewards/rejected": -1.3670681715011597, "step": 2634 }, { "epoch": 0.3, "learning_rate": 2.1206836006086853e-07, "logits/chosen": -3.0822315216064453, "logits/rejected": -3.226738452911377, "logps/chosen": -210.1138458251953, "logps/rejected": -296.59503173828125, "loss": 0.5019, "rewards/accuracies": 0.75, "rewards/chosen": 0.37011346220970154, "rewards/margins": 1.0705934762954712, "rewards/rejected": -0.700480043888092, "step": 2635 }, { "epoch": 0.3, "learning_rate": 2.1203324359124428e-07, "logits/chosen": -2.6529245376586914, "logits/rejected": -2.863008499145508, "logps/chosen": -273.86395263671875, "logps/rejected": -112.42547607421875, "loss": 0.5312, "rewards/accuracies": 0.75, "rewards/chosen": -0.4667474627494812, "rewards/margins": 0.6671906113624573, "rewards/rejected": -1.1339380741119385, "step": 2636 }, { "epoch": 0.3, "learning_rate": 2.1199812712162004e-07, "logits/chosen": -3.348909854888916, "logits/rejected": -3.4405832290649414, "logps/chosen": -245.3507537841797, "logps/rejected": -328.1563720703125, "loss": 0.1842, "rewards/accuracies": 1.0, "rewards/chosen": -0.31078895926475525, "rewards/margins": 2.1293139457702637, "rewards/rejected": -2.440103054046631, "step": 2637 }, { "epoch": 0.3, "learning_rate": 2.1196301065199576e-07, "logits/chosen": -2.7455739974975586, "logits/rejected": -2.6644973754882812, "logps/chosen": -148.25282287597656, "logps/rejected": -186.3067626953125, "loss": 0.5144, "rewards/accuracies": 0.625, "rewards/chosen": 0.22366288304328918, "rewards/margins": 0.8049038052558899, "rewards/rejected": -0.5812409520149231, "step": 2638 }, { "epoch": 0.3, "learning_rate": 2.1192789418237152e-07, "logits/chosen": -1.7965762615203857, "logits/rejected": -1.701200008392334, "logps/chosen": -346.4522705078125, "logps/rejected": -255.2733154296875, "loss": 0.5984, "rewards/accuracies": 0.625, "rewards/chosen": 0.11234212666749954, "rewards/margins": 0.9633269906044006, "rewards/rejected": -0.8509848117828369, "step": 2639 }, { "epoch": 0.3, "learning_rate": 2.1189277771274727e-07, "logits/chosen": -3.385784864425659, "logits/rejected": -3.2045576572418213, "logps/chosen": -312.9085998535156, "logps/rejected": -281.45745849609375, "loss": 0.1073, "rewards/accuracies": 1.0, "rewards/chosen": 0.27522900700569153, "rewards/margins": 2.7667925357818604, "rewards/rejected": -2.491563558578491, "step": 2640 }, { "epoch": 0.3, "learning_rate": 2.11857661243123e-07, "logits/chosen": -2.315624713897705, "logits/rejected": -2.173530340194702, "logps/chosen": -332.1817626953125, "logps/rejected": -288.607421875, "loss": 0.4544, "rewards/accuracies": 0.75, "rewards/chosen": 0.02453383058309555, "rewards/margins": 1.3337042331695557, "rewards/rejected": -1.3091704845428467, "step": 2641 }, { "epoch": 0.3, "learning_rate": 2.1182254477349878e-07, "logits/chosen": -2.7607760429382324, "logits/rejected": -2.784654140472412, "logps/chosen": -176.8272705078125, "logps/rejected": -262.67938232421875, "loss": 0.2918, "rewards/accuracies": 0.875, "rewards/chosen": 0.2575886845588684, "rewards/margins": 1.6384268999099731, "rewards/rejected": -1.380838394165039, "step": 2642 }, { "epoch": 0.3, "learning_rate": 2.117874283038745e-07, "logits/chosen": -3.110285758972168, "logits/rejected": -3.1426384449005127, "logps/chosen": -157.165283203125, "logps/rejected": -244.91180419921875, "loss": 0.4884, "rewards/accuracies": 0.625, "rewards/chosen": 0.07096487283706665, "rewards/margins": 1.6198042631149292, "rewards/rejected": -1.5488393306732178, "step": 2643 }, { "epoch": 0.3, "learning_rate": 2.1175231183425026e-07, "logits/chosen": -2.538911819458008, "logits/rejected": -2.7284297943115234, "logps/chosen": -152.67164611816406, "logps/rejected": -122.08198547363281, "loss": 0.5502, "rewards/accuracies": 0.75, "rewards/chosen": -0.9580534100532532, "rewards/margins": 1.063791036605835, "rewards/rejected": -2.0218443870544434, "step": 2644 }, { "epoch": 0.3, "learning_rate": 2.1171719536462601e-07, "logits/chosen": -2.8962833881378174, "logits/rejected": -3.2447714805603027, "logps/chosen": -232.14337158203125, "logps/rejected": -260.5798034667969, "loss": 0.3313, "rewards/accuracies": 0.75, "rewards/chosen": 0.2283482402563095, "rewards/margins": 2.361008882522583, "rewards/rejected": -2.132660388946533, "step": 2645 }, { "epoch": 0.31, "learning_rate": 2.1168207889500174e-07, "logits/chosen": -2.660217046737671, "logits/rejected": -2.7193503379821777, "logps/chosen": -367.6368408203125, "logps/rejected": -191.890869140625, "loss": 0.4567, "rewards/accuracies": 0.75, "rewards/chosen": 0.6010500192642212, "rewards/margins": 1.9173550605773926, "rewards/rejected": -1.3163049221038818, "step": 2646 }, { "epoch": 0.31, "learning_rate": 2.116469624253775e-07, "logits/chosen": -2.9813153743743896, "logits/rejected": -3.253883123397827, "logps/chosen": -499.7625427246094, "logps/rejected": -322.88751220703125, "loss": 0.6191, "rewards/accuracies": 0.875, "rewards/chosen": 0.021469667553901672, "rewards/margins": 0.857175350189209, "rewards/rejected": -0.8357056379318237, "step": 2647 }, { "epoch": 0.31, "learning_rate": 2.1161184595575325e-07, "logits/chosen": -2.829207420349121, "logits/rejected": -2.7558043003082275, "logps/chosen": -277.6553649902344, "logps/rejected": -334.64935302734375, "loss": 0.4406, "rewards/accuracies": 0.75, "rewards/chosen": 0.06831994652748108, "rewards/margins": 2.1513161659240723, "rewards/rejected": -2.082996129989624, "step": 2648 }, { "epoch": 0.31, "learning_rate": 2.1157672948612898e-07, "logits/chosen": -3.97103214263916, "logits/rejected": -3.8434014320373535, "logps/chosen": -100.18197631835938, "logps/rejected": -83.30469512939453, "loss": 0.4779, "rewards/accuracies": 0.75, "rewards/chosen": 0.3769177496433258, "rewards/margins": 0.8938929438591003, "rewards/rejected": -0.5169751644134521, "step": 2649 }, { "epoch": 0.31, "learning_rate": 2.1154161301650473e-07, "logits/chosen": -3.3420674800872803, "logits/rejected": -3.755899667739868, "logps/chosen": -275.7237243652344, "logps/rejected": -316.19622802734375, "loss": 0.3554, "rewards/accuracies": 0.875, "rewards/chosen": -0.23422250151634216, "rewards/margins": 2.5390076637268066, "rewards/rejected": -2.7732300758361816, "step": 2650 }, { "epoch": 0.31, "learning_rate": 2.1150649654688046e-07, "logits/chosen": -3.2168405055999756, "logits/rejected": -3.186497211456299, "logps/chosen": -249.2946014404297, "logps/rejected": -274.6036376953125, "loss": 0.6596, "rewards/accuracies": 0.375, "rewards/chosen": -0.6279729008674622, "rewards/margins": 0.36376339197158813, "rewards/rejected": -0.9917362928390503, "step": 2651 }, { "epoch": 0.31, "learning_rate": 2.114713800772562e-07, "logits/chosen": -3.032636880874634, "logits/rejected": -3.1316819190979004, "logps/chosen": -368.83294677734375, "logps/rejected": -294.6126708984375, "loss": 0.2222, "rewards/accuracies": 1.0, "rewards/chosen": 0.10298866778612137, "rewards/margins": 2.1913034915924072, "rewards/rejected": -2.088315010070801, "step": 2652 }, { "epoch": 0.31, "learning_rate": 2.11436263607632e-07, "logits/chosen": -2.524991512298584, "logits/rejected": -2.8310399055480957, "logps/chosen": -155.92784118652344, "logps/rejected": -240.8963623046875, "loss": 0.5906, "rewards/accuracies": 0.75, "rewards/chosen": -0.24226030707359314, "rewards/margins": 1.8797134160995483, "rewards/rejected": -2.121973752975464, "step": 2653 }, { "epoch": 0.31, "learning_rate": 2.1140114713800772e-07, "logits/chosen": -3.855231285095215, "logits/rejected": -3.8131346702575684, "logps/chosen": -201.9010467529297, "logps/rejected": -158.7369384765625, "loss": 0.5394, "rewards/accuracies": 0.75, "rewards/chosen": 0.3431272804737091, "rewards/margins": 1.4349701404571533, "rewards/rejected": -1.0918428897857666, "step": 2654 }, { "epoch": 0.31, "learning_rate": 2.1136603066838347e-07, "logits/chosen": -2.9801840782165527, "logits/rejected": -3.0134928226470947, "logps/chosen": -206.4908447265625, "logps/rejected": -212.1171112060547, "loss": 0.3557, "rewards/accuracies": 0.75, "rewards/chosen": 0.24139977991580963, "rewards/margins": 1.6338372230529785, "rewards/rejected": -1.39243745803833, "step": 2655 }, { "epoch": 0.31, "learning_rate": 2.1133091419875923e-07, "logits/chosen": -3.3574025630950928, "logits/rejected": -3.284484386444092, "logps/chosen": -355.9624328613281, "logps/rejected": -294.3044738769531, "loss": 0.2593, "rewards/accuracies": 1.0, "rewards/chosen": 0.051383137702941895, "rewards/margins": 2.213636875152588, "rewards/rejected": -2.1622536182403564, "step": 2656 }, { "epoch": 0.31, "learning_rate": 2.1129579772913495e-07, "logits/chosen": -2.746053695678711, "logits/rejected": -2.828361749649048, "logps/chosen": -256.38055419921875, "logps/rejected": -281.7858581542969, "loss": 0.1683, "rewards/accuracies": 1.0, "rewards/chosen": 0.8860880732536316, "rewards/margins": 3.565058469772339, "rewards/rejected": -2.6789703369140625, "step": 2657 }, { "epoch": 0.31, "learning_rate": 2.112606812595107e-07, "logits/chosen": -3.005098342895508, "logits/rejected": -2.9978652000427246, "logps/chosen": -260.446044921875, "logps/rejected": -244.42608642578125, "loss": 0.3022, "rewards/accuracies": 0.875, "rewards/chosen": 0.13928157091140747, "rewards/margins": 1.4200856685638428, "rewards/rejected": -1.28080415725708, "step": 2658 }, { "epoch": 0.31, "learning_rate": 2.1122556478988644e-07, "logits/chosen": -3.190901517868042, "logits/rejected": -2.9818334579467773, "logps/chosen": -306.5211486816406, "logps/rejected": -280.48028564453125, "loss": 0.3138, "rewards/accuracies": 0.875, "rewards/chosen": -0.07627400755882263, "rewards/margins": 2.1277923583984375, "rewards/rejected": -2.204066514968872, "step": 2659 }, { "epoch": 0.31, "learning_rate": 2.111904483202622e-07, "logits/chosen": -3.647137403488159, "logits/rejected": -3.631355047225952, "logps/chosen": -259.9970703125, "logps/rejected": -317.7362365722656, "loss": 0.6997, "rewards/accuracies": 0.75, "rewards/chosen": -0.4353281557559967, "rewards/margins": 1.0021271705627441, "rewards/rejected": -1.4374552965164185, "step": 2660 }, { "epoch": 0.31, "learning_rate": 2.1115533185063794e-07, "logits/chosen": -3.617340564727783, "logits/rejected": -4.030972957611084, "logps/chosen": -198.78048706054688, "logps/rejected": -331.6608581542969, "loss": 0.4359, "rewards/accuracies": 0.75, "rewards/chosen": -0.0650661289691925, "rewards/margins": 3.150432586669922, "rewards/rejected": -3.215498447418213, "step": 2661 }, { "epoch": 0.31, "learning_rate": 2.1112021538101367e-07, "logits/chosen": -2.8107078075408936, "logits/rejected": -2.870521068572998, "logps/chosen": -265.8528137207031, "logps/rejected": -264.05474853515625, "loss": 0.5018, "rewards/accuracies": 0.75, "rewards/chosen": 0.25905323028564453, "rewards/margins": 0.8668520450592041, "rewards/rejected": -0.6077988743782043, "step": 2662 }, { "epoch": 0.31, "learning_rate": 2.1108509891138942e-07, "logits/chosen": -3.0048599243164062, "logits/rejected": -3.1161508560180664, "logps/chosen": -132.58041381835938, "logps/rejected": -160.42922973632812, "loss": 0.376, "rewards/accuracies": 0.75, "rewards/chosen": -0.24053612351417542, "rewards/margins": 1.4679057598114014, "rewards/rejected": -1.7084418535232544, "step": 2663 }, { "epoch": 0.31, "learning_rate": 2.1104998244176515e-07, "logits/chosen": -2.939281702041626, "logits/rejected": -2.7568531036376953, "logps/chosen": -217.7142791748047, "logps/rejected": -264.04083251953125, "loss": 0.3856, "rewards/accuracies": 0.75, "rewards/chosen": 0.2692546844482422, "rewards/margins": 1.320460557937622, "rewards/rejected": -1.0512057542800903, "step": 2664 }, { "epoch": 0.31, "learning_rate": 2.1101486597214093e-07, "logits/chosen": -2.4977235794067383, "logits/rejected": -2.569164752960205, "logps/chosen": -307.8876037597656, "logps/rejected": -216.93710327148438, "loss": 0.5921, "rewards/accuracies": 0.875, "rewards/chosen": -0.00397057831287384, "rewards/margins": 1.6792693138122559, "rewards/rejected": -1.6832399368286133, "step": 2665 }, { "epoch": 0.31, "learning_rate": 2.1097974950251669e-07, "logits/chosen": -2.9322047233581543, "logits/rejected": -2.8850135803222656, "logps/chosen": -153.75851440429688, "logps/rejected": -199.24948120117188, "loss": 0.5032, "rewards/accuracies": 0.75, "rewards/chosen": -0.1545974165201187, "rewards/margins": 0.6758066415786743, "rewards/rejected": -0.8304040431976318, "step": 2666 }, { "epoch": 0.31, "learning_rate": 2.109446330328924e-07, "logits/chosen": -2.508617877960205, "logits/rejected": -2.6317267417907715, "logps/chosen": -391.77093505859375, "logps/rejected": -262.9716491699219, "loss": 0.2904, "rewards/accuracies": 0.875, "rewards/chosen": 0.14566461741924286, "rewards/margins": 1.6080398559570312, "rewards/rejected": -1.462375283241272, "step": 2667 }, { "epoch": 0.31, "learning_rate": 2.1090951656326817e-07, "logits/chosen": -2.4445652961730957, "logits/rejected": -2.621253252029419, "logps/chosen": -383.74267578125, "logps/rejected": -253.97607421875, "loss": 0.2879, "rewards/accuracies": 1.0, "rewards/chosen": 0.47786152362823486, "rewards/margins": 2.01947021484375, "rewards/rejected": -1.5416086912155151, "step": 2668 }, { "epoch": 0.31, "learning_rate": 2.1087440009364392e-07, "logits/chosen": -2.8375632762908936, "logits/rejected": -2.8650431632995605, "logps/chosen": -395.013916015625, "logps/rejected": -225.9221649169922, "loss": 0.6505, "rewards/accuracies": 0.75, "rewards/chosen": -0.47935783863067627, "rewards/margins": 1.5428438186645508, "rewards/rejected": -2.0222015380859375, "step": 2669 }, { "epoch": 0.31, "learning_rate": 2.1083928362401965e-07, "logits/chosen": -3.415860176086426, "logits/rejected": -3.3173723220825195, "logps/chosen": -180.45108032226562, "logps/rejected": -195.68460083007812, "loss": 0.5787, "rewards/accuracies": 0.875, "rewards/chosen": -0.3478144109249115, "rewards/margins": 1.1470049619674683, "rewards/rejected": -1.4948192834854126, "step": 2670 }, { "epoch": 0.31, "learning_rate": 2.108041671543954e-07, "logits/chosen": -3.7328004837036133, "logits/rejected": -3.8640236854553223, "logps/chosen": -92.90747833251953, "logps/rejected": -189.60092163085938, "loss": 0.2147, "rewards/accuracies": 0.875, "rewards/chosen": 0.1304924190044403, "rewards/margins": 2.4503655433654785, "rewards/rejected": -2.319873332977295, "step": 2671 }, { "epoch": 0.31, "learning_rate": 2.1076905068477113e-07, "logits/chosen": -3.435408353805542, "logits/rejected": -3.4077329635620117, "logps/chosen": -103.98704528808594, "logps/rejected": -138.59429931640625, "loss": 0.375, "rewards/accuracies": 0.75, "rewards/chosen": 0.014223292469978333, "rewards/margins": 1.1510636806488037, "rewards/rejected": -1.1368404626846313, "step": 2672 }, { "epoch": 0.31, "learning_rate": 2.1073393421514688e-07, "logits/chosen": -3.4270856380462646, "logits/rejected": -3.6255593299865723, "logps/chosen": -99.92672729492188, "logps/rejected": -123.4222412109375, "loss": 0.48, "rewards/accuracies": 0.625, "rewards/chosen": -0.1662251055240631, "rewards/margins": 1.2893134355545044, "rewards/rejected": -1.4555386304855347, "step": 2673 }, { "epoch": 0.31, "learning_rate": 2.1069881774552264e-07, "logits/chosen": -3.5459532737731934, "logits/rejected": -3.3756797313690186, "logps/chosen": -281.7789306640625, "logps/rejected": -275.4245300292969, "loss": 0.7326, "rewards/accuracies": 0.75, "rewards/chosen": -1.160372257232666, "rewards/margins": 0.7282190322875977, "rewards/rejected": -1.8885912895202637, "step": 2674 }, { "epoch": 0.31, "learning_rate": 2.1066370127589836e-07, "logits/chosen": -3.8656203746795654, "logits/rejected": -3.727492332458496, "logps/chosen": -232.46302795410156, "logps/rejected": -183.06394958496094, "loss": 0.4531, "rewards/accuracies": 0.75, "rewards/chosen": -0.3305974006652832, "rewards/margins": 0.8914026618003845, "rewards/rejected": -1.2220001220703125, "step": 2675 }, { "epoch": 0.31, "learning_rate": 2.1062858480627414e-07, "logits/chosen": -3.6443703174591064, "logits/rejected": -3.1740593910217285, "logps/chosen": -300.6732177734375, "logps/rejected": -213.2513885498047, "loss": 0.1587, "rewards/accuracies": 1.0, "rewards/chosen": 0.48693275451660156, "rewards/margins": 2.5872771739959717, "rewards/rejected": -2.10034441947937, "step": 2676 }, { "epoch": 0.31, "learning_rate": 2.105934683366499e-07, "logits/chosen": -3.6283326148986816, "logits/rejected": -3.6519641876220703, "logps/chosen": -315.93853759765625, "logps/rejected": -424.2310791015625, "loss": 0.395, "rewards/accuracies": 0.875, "rewards/chosen": -0.3778654932975769, "rewards/margins": 1.5352487564086914, "rewards/rejected": -1.9131141901016235, "step": 2677 }, { "epoch": 0.31, "learning_rate": 2.1055835186702563e-07, "logits/chosen": -2.7422573566436768, "logits/rejected": -2.8363497257232666, "logps/chosen": -355.2220458984375, "logps/rejected": -262.51263427734375, "loss": 0.3191, "rewards/accuracies": 0.875, "rewards/chosen": 0.04066774249076843, "rewards/margins": 1.8454184532165527, "rewards/rejected": -1.804750680923462, "step": 2678 }, { "epoch": 0.31, "learning_rate": 2.1052323539740138e-07, "logits/chosen": -2.541379451751709, "logits/rejected": -2.456173896789551, "logps/chosen": -324.5509338378906, "logps/rejected": -393.8587341308594, "loss": 0.3133, "rewards/accuracies": 0.875, "rewards/chosen": 0.02267785370349884, "rewards/margins": 1.363777995109558, "rewards/rejected": -1.3411000967025757, "step": 2679 }, { "epoch": 0.31, "learning_rate": 2.104881189277771e-07, "logits/chosen": -3.055565357208252, "logits/rejected": -2.951294183731079, "logps/chosen": -152.54299926757812, "logps/rejected": -198.94424438476562, "loss": 0.1704, "rewards/accuracies": 1.0, "rewards/chosen": 0.2504234313964844, "rewards/margins": 3.040806293487549, "rewards/rejected": -2.7903826236724854, "step": 2680 }, { "epoch": 0.31, "learning_rate": 2.1045300245815286e-07, "logits/chosen": -2.892068386077881, "logits/rejected": -2.700523614883423, "logps/chosen": -202.7470703125, "logps/rejected": -188.39675903320312, "loss": 0.7224, "rewards/accuracies": 0.625, "rewards/chosen": -0.3664044141769409, "rewards/margins": 0.3669832944869995, "rewards/rejected": -0.7333877086639404, "step": 2681 }, { "epoch": 0.31, "learning_rate": 2.1041788598852861e-07, "logits/chosen": -2.5932185649871826, "logits/rejected": -2.5721709728240967, "logps/chosen": -244.40576171875, "logps/rejected": -265.55419921875, "loss": 0.4802, "rewards/accuracies": 0.625, "rewards/chosen": -0.8531277179718018, "rewards/margins": 0.8602733612060547, "rewards/rejected": -1.7134010791778564, "step": 2682 }, { "epoch": 0.31, "learning_rate": 2.1038276951890434e-07, "logits/chosen": -2.5868353843688965, "logits/rejected": -2.671602249145508, "logps/chosen": -199.66122436523438, "logps/rejected": -393.555419921875, "loss": 0.3582, "rewards/accuracies": 0.875, "rewards/chosen": 0.3236641585826874, "rewards/margins": 3.164863109588623, "rewards/rejected": -2.841198682785034, "step": 2683 }, { "epoch": 0.31, "learning_rate": 2.103476530492801e-07, "logits/chosen": -2.8019237518310547, "logits/rejected": -2.676260232925415, "logps/chosen": -161.70999145507812, "logps/rejected": -196.5008544921875, "loss": 0.494, "rewards/accuracies": 0.75, "rewards/chosen": -0.46975642442703247, "rewards/margins": 1.6134014129638672, "rewards/rejected": -2.083158016204834, "step": 2684 }, { "epoch": 0.31, "learning_rate": 2.1031253657965588e-07, "logits/chosen": -2.8804428577423096, "logits/rejected": -2.9575161933898926, "logps/chosen": -196.95571899414062, "logps/rejected": -206.85055541992188, "loss": 0.3039, "rewards/accuracies": 0.875, "rewards/chosen": 0.00013312697410583496, "rewards/margins": 1.4372384548187256, "rewards/rejected": -1.4371052980422974, "step": 2685 }, { "epoch": 0.31, "learning_rate": 2.1027742011003158e-07, "logits/chosen": -2.6479103565216064, "logits/rejected": -2.7668538093566895, "logps/chosen": -404.8021545410156, "logps/rejected": -376.9189453125, "loss": 0.5218, "rewards/accuracies": 0.75, "rewards/chosen": -0.28654834628105164, "rewards/margins": 2.0833852291107178, "rewards/rejected": -2.369933605194092, "step": 2686 }, { "epoch": 0.31, "learning_rate": 2.1024230364040736e-07, "logits/chosen": -2.721830368041992, "logits/rejected": -2.6887221336364746, "logps/chosen": -479.88433837890625, "logps/rejected": -330.96258544921875, "loss": 0.1944, "rewards/accuracies": 0.875, "rewards/chosen": 0.529760479927063, "rewards/margins": 2.816356658935547, "rewards/rejected": -2.2865958213806152, "step": 2687 }, { "epoch": 0.31, "learning_rate": 2.1020718717078309e-07, "logits/chosen": -3.017599582672119, "logits/rejected": -2.7422571182250977, "logps/chosen": -574.6094360351562, "logps/rejected": -324.6213684082031, "loss": 0.3177, "rewards/accuracies": 0.875, "rewards/chosen": 0.11848056316375732, "rewards/margins": 1.5183645486831665, "rewards/rejected": -1.3998838663101196, "step": 2688 }, { "epoch": 0.31, "learning_rate": 2.1017207070115884e-07, "logits/chosen": -2.825831174850464, "logits/rejected": -2.6222283840179443, "logps/chosen": -247.04901123046875, "logps/rejected": -203.26644897460938, "loss": 0.623, "rewards/accuracies": 0.625, "rewards/chosen": -0.5861104130744934, "rewards/margins": 0.5202609896659851, "rewards/rejected": -1.1063714027404785, "step": 2689 }, { "epoch": 0.31, "learning_rate": 2.101369542315346e-07, "logits/chosen": -3.076383590698242, "logits/rejected": -3.1518568992614746, "logps/chosen": -454.2213439941406, "logps/rejected": -299.42901611328125, "loss": 0.2745, "rewards/accuracies": 0.875, "rewards/chosen": 0.47566550970077515, "rewards/margins": 2.2643818855285645, "rewards/rejected": -1.7887163162231445, "step": 2690 }, { "epoch": 0.31, "learning_rate": 2.1010183776191032e-07, "logits/chosen": -3.569575309753418, "logits/rejected": -3.248314142227173, "logps/chosen": -285.6998291015625, "logps/rejected": -250.03831481933594, "loss": 0.3628, "rewards/accuracies": 0.875, "rewards/chosen": 0.2644709646701813, "rewards/margins": 1.9379606246948242, "rewards/rejected": -1.6734895706176758, "step": 2691 }, { "epoch": 0.31, "learning_rate": 2.1006672129228607e-07, "logits/chosen": -3.544778347015381, "logits/rejected": -3.777670383453369, "logps/chosen": -258.34637451171875, "logps/rejected": -272.9873352050781, "loss": 0.5142, "rewards/accuracies": 0.625, "rewards/chosen": -0.6007353067398071, "rewards/margins": 1.0859975814819336, "rewards/rejected": -1.6867328882217407, "step": 2692 }, { "epoch": 0.31, "learning_rate": 2.1003160482266183e-07, "logits/chosen": -3.152451992034912, "logits/rejected": -3.0305657386779785, "logps/chosen": -345.98406982421875, "logps/rejected": -336.1103515625, "loss": 0.1408, "rewards/accuracies": 1.0, "rewards/chosen": 0.412387490272522, "rewards/margins": 2.871246814727783, "rewards/rejected": -2.4588592052459717, "step": 2693 }, { "epoch": 0.31, "learning_rate": 2.0999648835303756e-07, "logits/chosen": -3.2075588703155518, "logits/rejected": -3.689764976501465, "logps/chosen": -313.42242431640625, "logps/rejected": -169.81350708007812, "loss": 0.5329, "rewards/accuracies": 0.75, "rewards/chosen": -0.5771796703338623, "rewards/margins": 0.5056952238082886, "rewards/rejected": -1.0828747749328613, "step": 2694 }, { "epoch": 0.31, "learning_rate": 2.099613718834133e-07, "logits/chosen": -2.710972547531128, "logits/rejected": -2.895327091217041, "logps/chosen": -350.9754638671875, "logps/rejected": -232.68905639648438, "loss": 0.4867, "rewards/accuracies": 0.625, "rewards/chosen": -0.036527350544929504, "rewards/margins": 1.3880201578140259, "rewards/rejected": -1.4245474338531494, "step": 2695 }, { "epoch": 0.31, "learning_rate": 2.0992625541378904e-07, "logits/chosen": -2.858287811279297, "logits/rejected": -3.0463531017303467, "logps/chosen": -313.5277404785156, "logps/rejected": -379.3931579589844, "loss": 0.4762, "rewards/accuracies": 0.625, "rewards/chosen": -0.31277066469192505, "rewards/margins": 1.923231601715088, "rewards/rejected": -2.236002206802368, "step": 2696 }, { "epoch": 0.31, "learning_rate": 2.098911389441648e-07, "logits/chosen": -3.0413875579833984, "logits/rejected": -3.0731945037841797, "logps/chosen": -278.4690856933594, "logps/rejected": -399.407958984375, "loss": 0.1736, "rewards/accuracies": 1.0, "rewards/chosen": 0.4262908101081848, "rewards/margins": 3.117654323577881, "rewards/rejected": -2.69136381149292, "step": 2697 }, { "epoch": 0.31, "learning_rate": 2.0985602247454057e-07, "logits/chosen": -3.3376400470733643, "logits/rejected": -2.549386501312256, "logps/chosen": -261.89373779296875, "logps/rejected": -132.71653747558594, "loss": 0.6113, "rewards/accuracies": 0.5, "rewards/chosen": -0.14452692866325378, "rewards/margins": 0.4512198567390442, "rewards/rejected": -0.5957468152046204, "step": 2698 }, { "epoch": 0.31, "learning_rate": 2.098209060049163e-07, "logits/chosen": -2.8589749336242676, "logits/rejected": -3.1143991947174072, "logps/chosen": -240.55950927734375, "logps/rejected": -253.86822509765625, "loss": 0.5735, "rewards/accuracies": 0.75, "rewards/chosen": -0.25031960010528564, "rewards/margins": 0.9200965166091919, "rewards/rejected": -1.1704161167144775, "step": 2699 }, { "epoch": 0.31, "learning_rate": 2.0978578953529205e-07, "logits/chosen": -3.168196678161621, "logits/rejected": -3.3921029567718506, "logps/chosen": -116.16915130615234, "logps/rejected": -297.52691650390625, "loss": 0.5029, "rewards/accuracies": 0.75, "rewards/chosen": -0.3023165166378021, "rewards/margins": 1.0676770210266113, "rewards/rejected": -1.3699935674667358, "step": 2700 }, { "epoch": 0.31, "learning_rate": 2.097506730656678e-07, "logits/chosen": -3.9658989906311035, "logits/rejected": -3.6875076293945312, "logps/chosen": -161.37594604492188, "logps/rejected": -195.8558349609375, "loss": 0.81, "rewards/accuracies": 0.5, "rewards/chosen": -0.12227240204811096, "rewards/margins": 1.3459267616271973, "rewards/rejected": -1.4681992530822754, "step": 2701 }, { "epoch": 0.31, "learning_rate": 2.0971555659604353e-07, "logits/chosen": -2.984095811843872, "logits/rejected": -3.294487476348877, "logps/chosen": -370.7571716308594, "logps/rejected": -295.5445251464844, "loss": 0.5289, "rewards/accuracies": 0.75, "rewards/chosen": -0.05452115833759308, "rewards/margins": 0.6429303884506226, "rewards/rejected": -0.6974514722824097, "step": 2702 }, { "epoch": 0.31, "learning_rate": 2.096804401264193e-07, "logits/chosen": -2.801766872406006, "logits/rejected": -2.8464720249176025, "logps/chosen": -214.14891052246094, "logps/rejected": -252.0380096435547, "loss": 0.3709, "rewards/accuracies": 0.75, "rewards/chosen": -0.11697833240032196, "rewards/margins": 1.5305709838867188, "rewards/rejected": -1.6475491523742676, "step": 2703 }, { "epoch": 0.31, "learning_rate": 2.0964532365679501e-07, "logits/chosen": -2.698483943939209, "logits/rejected": -2.8396835327148438, "logps/chosen": -370.0849609375, "logps/rejected": -252.1450958251953, "loss": 0.1364, "rewards/accuracies": 1.0, "rewards/chosen": 0.08998577296733856, "rewards/margins": 2.55472469329834, "rewards/rejected": -2.4647388458251953, "step": 2704 }, { "epoch": 0.31, "learning_rate": 2.0961020718717077e-07, "logits/chosen": -3.1203620433807373, "logits/rejected": -3.3811442852020264, "logps/chosen": -209.79498291015625, "logps/rejected": -337.44732666015625, "loss": 0.3616, "rewards/accuracies": 0.875, "rewards/chosen": 0.20113682746887207, "rewards/margins": 2.4834189414978027, "rewards/rejected": -2.2822821140289307, "step": 2705 }, { "epoch": 0.31, "learning_rate": 2.0957509071754652e-07, "logits/chosen": -2.432766914367676, "logits/rejected": -2.4980971813201904, "logps/chosen": -319.7143859863281, "logps/rejected": -274.7018737792969, "loss": 0.431, "rewards/accuracies": 0.875, "rewards/chosen": -0.6734658479690552, "rewards/margins": 1.1623632907867432, "rewards/rejected": -1.8358290195465088, "step": 2706 }, { "epoch": 0.31, "learning_rate": 2.0953997424792225e-07, "logits/chosen": -2.96154522895813, "logits/rejected": -2.9254560470581055, "logps/chosen": -310.79266357421875, "logps/rejected": -329.29315185546875, "loss": 0.4107, "rewards/accuracies": 0.875, "rewards/chosen": 0.35735946893692017, "rewards/margins": 2.5898733139038086, "rewards/rejected": -2.232513904571533, "step": 2707 }, { "epoch": 0.31, "learning_rate": 2.09504857778298e-07, "logits/chosen": -2.600987434387207, "logits/rejected": -2.3445005416870117, "logps/chosen": -406.8525390625, "logps/rejected": -251.35968017578125, "loss": 0.3385, "rewards/accuracies": 0.875, "rewards/chosen": -0.05882759392261505, "rewards/margins": 1.2079129219055176, "rewards/rejected": -1.2667404413223267, "step": 2708 }, { "epoch": 0.31, "learning_rate": 2.0946974130867373e-07, "logits/chosen": -3.034916877746582, "logits/rejected": -2.986525774002075, "logps/chosen": -152.0174560546875, "logps/rejected": -325.87103271484375, "loss": 0.5273, "rewards/accuracies": 0.75, "rewards/chosen": -0.12014269828796387, "rewards/margins": 1.3156898021697998, "rewards/rejected": -1.4358325004577637, "step": 2709 }, { "epoch": 0.31, "learning_rate": 2.094346248390495e-07, "logits/chosen": -2.9180924892425537, "logits/rejected": -3.030306100845337, "logps/chosen": -335.950927734375, "logps/rejected": -276.42486572265625, "loss": 0.5605, "rewards/accuracies": 0.5, "rewards/chosen": 0.10493157804012299, "rewards/margins": 1.2087838649749756, "rewards/rejected": -1.1038521528244019, "step": 2710 }, { "epoch": 0.31, "learning_rate": 2.0939950836942526e-07, "logits/chosen": -3.11173415184021, "logits/rejected": -3.039310932159424, "logps/chosen": -160.8915557861328, "logps/rejected": -87.95643615722656, "loss": 0.8762, "rewards/accuracies": 0.5, "rewards/chosen": -0.7014458775520325, "rewards/margins": 0.0006791055202484131, "rewards/rejected": -0.7021249532699585, "step": 2711 }, { "epoch": 0.31, "learning_rate": 2.09364391899801e-07, "logits/chosen": -3.666139602661133, "logits/rejected": -3.310901403427124, "logps/chosen": -215.253173828125, "logps/rejected": -81.03885650634766, "loss": 0.988, "rewards/accuracies": 0.75, "rewards/chosen": -0.6212292909622192, "rewards/margins": 0.3523818552494049, "rewards/rejected": -0.9736111164093018, "step": 2712 }, { "epoch": 0.31, "learning_rate": 2.0932927543017675e-07, "logits/chosen": -2.936321496963501, "logits/rejected": -3.2686567306518555, "logps/chosen": -147.0630340576172, "logps/rejected": -218.07647705078125, "loss": 0.2062, "rewards/accuracies": 1.0, "rewards/chosen": 0.5394455194473267, "rewards/margins": 2.531698226928711, "rewards/rejected": -1.9922525882720947, "step": 2713 }, { "epoch": 0.31, "learning_rate": 2.092941589605525e-07, "logits/chosen": -2.443145751953125, "logits/rejected": -2.5913243293762207, "logps/chosen": -179.33511352539062, "logps/rejected": -228.26394653320312, "loss": 0.5721, "rewards/accuracies": 0.5, "rewards/chosen": -0.3532053828239441, "rewards/margins": 0.7078384757041931, "rewards/rejected": -1.0610438585281372, "step": 2714 }, { "epoch": 0.31, "learning_rate": 2.0925904249092823e-07, "logits/chosen": -2.9418888092041016, "logits/rejected": -3.1111104488372803, "logps/chosen": -305.97149658203125, "logps/rejected": -284.12255859375, "loss": 0.3957, "rewards/accuracies": 0.75, "rewards/chosen": -0.06284767389297485, "rewards/margins": 1.6648646593093872, "rewards/rejected": -1.7277122735977173, "step": 2715 }, { "epoch": 0.31, "learning_rate": 2.0922392602130398e-07, "logits/chosen": -2.8024463653564453, "logits/rejected": -2.684154987335205, "logps/chosen": -346.1974182128906, "logps/rejected": -294.5653076171875, "loss": 0.5388, "rewards/accuracies": 0.875, "rewards/chosen": 0.1463080495595932, "rewards/margins": 1.6074026823043823, "rewards/rejected": -1.461094856262207, "step": 2716 }, { "epoch": 0.31, "learning_rate": 2.091888095516797e-07, "logits/chosen": -2.924553871154785, "logits/rejected": -3.0525951385498047, "logps/chosen": -266.85491943359375, "logps/rejected": -217.34909057617188, "loss": 0.3502, "rewards/accuracies": 0.75, "rewards/chosen": -0.08117246627807617, "rewards/margins": 2.7042860984802246, "rewards/rejected": -2.78545880317688, "step": 2717 }, { "epoch": 0.31, "learning_rate": 2.0915369308205546e-07, "logits/chosen": -2.6071321964263916, "logits/rejected": -2.5663018226623535, "logps/chosen": -390.0578308105469, "logps/rejected": -307.9854736328125, "loss": 0.5368, "rewards/accuracies": 0.75, "rewards/chosen": -0.30872267484664917, "rewards/margins": 1.0495226383209229, "rewards/rejected": -1.3582452535629272, "step": 2718 }, { "epoch": 0.31, "learning_rate": 2.0911857661243124e-07, "logits/chosen": -2.9327125549316406, "logits/rejected": -2.772413730621338, "logps/chosen": -336.01568603515625, "logps/rejected": -425.3703308105469, "loss": 0.3806, "rewards/accuracies": 0.875, "rewards/chosen": 0.1350601315498352, "rewards/margins": 1.0636364221572876, "rewards/rejected": -0.9285762310028076, "step": 2719 }, { "epoch": 0.31, "learning_rate": 2.0908346014280694e-07, "logits/chosen": -3.5731375217437744, "logits/rejected": -3.603288412094116, "logps/chosen": -271.04486083984375, "logps/rejected": -201.68218994140625, "loss": 0.2945, "rewards/accuracies": 0.875, "rewards/chosen": 0.2796739339828491, "rewards/margins": 2.1193509101867676, "rewards/rejected": -1.839676856994629, "step": 2720 }, { "epoch": 0.31, "learning_rate": 2.0904834367318272e-07, "logits/chosen": -3.2600088119506836, "logits/rejected": -2.971642017364502, "logps/chosen": -156.05426025390625, "logps/rejected": -189.95968627929688, "loss": 0.8102, "rewards/accuracies": 0.625, "rewards/chosen": -0.6407157182693481, "rewards/margins": 0.3939170837402344, "rewards/rejected": -1.0346328020095825, "step": 2721 }, { "epoch": 0.31, "learning_rate": 2.0901322720355848e-07, "logits/chosen": -4.045426368713379, "logits/rejected": -3.636168956756592, "logps/chosen": -308.62548828125, "logps/rejected": -167.6669921875, "loss": 0.3873, "rewards/accuracies": 0.875, "rewards/chosen": 0.19946065545082092, "rewards/margins": 1.5284388065338135, "rewards/rejected": -1.3289780616760254, "step": 2722 }, { "epoch": 0.31, "learning_rate": 2.089781107339342e-07, "logits/chosen": -2.959185838699341, "logits/rejected": -3.1417200565338135, "logps/chosen": -321.5123596191406, "logps/rejected": -275.5606384277344, "loss": 0.6014, "rewards/accuracies": 0.75, "rewards/chosen": -0.11313152313232422, "rewards/margins": 0.844805121421814, "rewards/rejected": -0.9579366445541382, "step": 2723 }, { "epoch": 0.31, "learning_rate": 2.0894299426430996e-07, "logits/chosen": -3.3365931510925293, "logits/rejected": -3.814706325531006, "logps/chosen": -267.4439392089844, "logps/rejected": -366.5554504394531, "loss": 0.0601, "rewards/accuracies": 1.0, "rewards/chosen": 0.7470001578330994, "rewards/margins": 5.158563137054443, "rewards/rejected": -4.411562442779541, "step": 2724 }, { "epoch": 0.31, "learning_rate": 2.0890787779468569e-07, "logits/chosen": -2.8866372108459473, "logits/rejected": -3.149526834487915, "logps/chosen": -148.21160888671875, "logps/rejected": -210.41029357910156, "loss": 0.4718, "rewards/accuracies": 0.625, "rewards/chosen": -0.10630086064338684, "rewards/margins": 1.1432745456695557, "rewards/rejected": -1.2495753765106201, "step": 2725 }, { "epoch": 0.31, "learning_rate": 2.0887276132506144e-07, "logits/chosen": -3.232724905014038, "logits/rejected": -2.647637367248535, "logps/chosen": -271.8125915527344, "logps/rejected": -185.46218872070312, "loss": 0.6035, "rewards/accuracies": 0.625, "rewards/chosen": -0.5131202340126038, "rewards/margins": 0.8920029401779175, "rewards/rejected": -1.4051231145858765, "step": 2726 }, { "epoch": 0.31, "learning_rate": 2.088376448554372e-07, "logits/chosen": -2.9529480934143066, "logits/rejected": -3.0681285858154297, "logps/chosen": -175.5751190185547, "logps/rejected": -304.4449157714844, "loss": 0.657, "rewards/accuracies": 0.5, "rewards/chosen": -0.2673693001270294, "rewards/margins": 0.7302308678627014, "rewards/rejected": -0.997600257396698, "step": 2727 }, { "epoch": 0.31, "learning_rate": 2.0880252838581292e-07, "logits/chosen": -2.607029438018799, "logits/rejected": -2.7416880130767822, "logps/chosen": -347.989013671875, "logps/rejected": -281.9116516113281, "loss": 0.093, "rewards/accuracies": 1.0, "rewards/chosen": 0.15197797119617462, "rewards/margins": 3.1610162258148193, "rewards/rejected": -3.0090384483337402, "step": 2728 }, { "epoch": 0.31, "learning_rate": 2.0876741191618868e-07, "logits/chosen": -3.648715019226074, "logits/rejected": -3.5327646732330322, "logps/chosen": -212.24200439453125, "logps/rejected": -179.3642578125, "loss": 0.3657, "rewards/accuracies": 0.75, "rewards/chosen": -0.011868447065353394, "rewards/margins": 2.103546142578125, "rewards/rejected": -2.115414619445801, "step": 2729 }, { "epoch": 0.31, "learning_rate": 2.0873229544656446e-07, "logits/chosen": -3.250645399093628, "logits/rejected": -3.0961387157440186, "logps/chosen": -333.5579833984375, "logps/rejected": -286.25653076171875, "loss": 0.1929, "rewards/accuracies": 0.875, "rewards/chosen": 0.3795652389526367, "rewards/margins": 2.307131052017212, "rewards/rejected": -1.927565574645996, "step": 2730 }, { "epoch": 0.31, "learning_rate": 2.0869717897694016e-07, "logits/chosen": -3.1712851524353027, "logits/rejected": -2.850739002227783, "logps/chosen": -303.90948486328125, "logps/rejected": -318.20599365234375, "loss": 0.2678, "rewards/accuracies": 0.875, "rewards/chosen": -0.1232505738735199, "rewards/margins": 1.605102300643921, "rewards/rejected": -1.7283529043197632, "step": 2731 }, { "epoch": 0.31, "learning_rate": 2.0866206250731594e-07, "logits/chosen": -3.1232945919036865, "logits/rejected": -2.68102765083313, "logps/chosen": -292.6473083496094, "logps/rejected": -271.8005676269531, "loss": 0.398, "rewards/accuracies": 0.75, "rewards/chosen": 0.03885522484779358, "rewards/margins": 1.6380221843719482, "rewards/rejected": -1.5991668701171875, "step": 2732 }, { "epoch": 0.32, "learning_rate": 2.0862694603769166e-07, "logits/chosen": -2.438480854034424, "logits/rejected": -2.4167518615722656, "logps/chosen": -462.29327392578125, "logps/rejected": -441.25799560546875, "loss": 0.4538, "rewards/accuracies": 0.625, "rewards/chosen": 0.03119295835494995, "rewards/margins": 1.4113497734069824, "rewards/rejected": -1.3801567554473877, "step": 2733 }, { "epoch": 0.32, "learning_rate": 2.0859182956806742e-07, "logits/chosen": -3.5268936157226562, "logits/rejected": -3.4741175174713135, "logps/chosen": -241.01690673828125, "logps/rejected": -149.72216796875, "loss": 0.3056, "rewards/accuracies": 1.0, "rewards/chosen": -0.334955096244812, "rewards/margins": 2.012882709503174, "rewards/rejected": -2.3478379249572754, "step": 2734 }, { "epoch": 0.32, "learning_rate": 2.0855671309844317e-07, "logits/chosen": -3.6205995082855225, "logits/rejected": -3.635786533355713, "logps/chosen": -207.8341827392578, "logps/rejected": -207.9324951171875, "loss": 0.6389, "rewards/accuracies": 0.625, "rewards/chosen": 0.062294840812683105, "rewards/margins": 0.6435567140579224, "rewards/rejected": -0.5812618732452393, "step": 2735 }, { "epoch": 0.32, "learning_rate": 2.085215966288189e-07, "logits/chosen": -2.772484302520752, "logits/rejected": -2.720337390899658, "logps/chosen": -296.5679626464844, "logps/rejected": -242.85589599609375, "loss": 0.4225, "rewards/accuracies": 0.75, "rewards/chosen": 0.3278283178806305, "rewards/margins": 1.6699044704437256, "rewards/rejected": -1.342076063156128, "step": 2736 }, { "epoch": 0.32, "learning_rate": 2.0848648015919465e-07, "logits/chosen": -2.7820305824279785, "logits/rejected": -2.8744516372680664, "logps/chosen": -287.03204345703125, "logps/rejected": -233.9835205078125, "loss": 0.7323, "rewards/accuracies": 0.625, "rewards/chosen": -0.6748775243759155, "rewards/margins": 1.3397912979125977, "rewards/rejected": -2.0146689414978027, "step": 2737 }, { "epoch": 0.32, "learning_rate": 2.084513636895704e-07, "logits/chosen": -3.0112247467041016, "logits/rejected": -3.2624130249023438, "logps/chosen": -232.97894287109375, "logps/rejected": -292.38720703125, "loss": 0.25, "rewards/accuracies": 0.875, "rewards/chosen": 0.1521078199148178, "rewards/margins": 2.252563953399658, "rewards/rejected": -2.1004562377929688, "step": 2738 }, { "epoch": 0.32, "learning_rate": 2.0841624721994613e-07, "logits/chosen": -2.42109751701355, "logits/rejected": -2.6753807067871094, "logps/chosen": -214.0558624267578, "logps/rejected": -324.70733642578125, "loss": 0.882, "rewards/accuracies": 0.625, "rewards/chosen": -0.0905417650938034, "rewards/margins": 0.23565584421157837, "rewards/rejected": -0.32619768381118774, "step": 2739 }, { "epoch": 0.32, "learning_rate": 2.083811307503219e-07, "logits/chosen": -2.8686671257019043, "logits/rejected": -2.8672688007354736, "logps/chosen": -232.95529174804688, "logps/rejected": -292.22552490234375, "loss": 0.2575, "rewards/accuracies": 1.0, "rewards/chosen": -0.020238623023033142, "rewards/margins": 1.858863353729248, "rewards/rejected": -1.8791018724441528, "step": 2740 }, { "epoch": 0.32, "learning_rate": 2.0834601428069762e-07, "logits/chosen": -3.703939437866211, "logits/rejected": -3.371593952178955, "logps/chosen": -302.5903625488281, "logps/rejected": -235.95545959472656, "loss": 0.5862, "rewards/accuracies": 0.875, "rewards/chosen": -0.09825241565704346, "rewards/margins": 0.8189486265182495, "rewards/rejected": -0.917201042175293, "step": 2741 }, { "epoch": 0.32, "learning_rate": 2.0831089781107337e-07, "logits/chosen": -2.69975209236145, "logits/rejected": -3.0724833011627197, "logps/chosen": -229.7742156982422, "logps/rejected": -231.9947509765625, "loss": 0.3098, "rewards/accuracies": 0.875, "rewards/chosen": 0.3144189715385437, "rewards/margins": 2.1158127784729004, "rewards/rejected": -1.801393747329712, "step": 2742 }, { "epoch": 0.32, "learning_rate": 2.0827578134144915e-07, "logits/chosen": -3.380939245223999, "logits/rejected": -3.7571020126342773, "logps/chosen": -148.39996337890625, "logps/rejected": -160.85269165039062, "loss": 0.3082, "rewards/accuracies": 1.0, "rewards/chosen": 0.12602221965789795, "rewards/margins": 1.8812940120697021, "rewards/rejected": -1.7552719116210938, "step": 2743 }, { "epoch": 0.32, "learning_rate": 2.0824066487182488e-07, "logits/chosen": -2.8378567695617676, "logits/rejected": -2.9269256591796875, "logps/chosen": -484.6377868652344, "logps/rejected": -449.677734375, "loss": 0.3796, "rewards/accuracies": 0.875, "rewards/chosen": 0.0651644766330719, "rewards/margins": 1.9449522495269775, "rewards/rejected": -1.8797876834869385, "step": 2744 }, { "epoch": 0.32, "learning_rate": 2.0820554840220063e-07, "logits/chosen": -3.15161395072937, "logits/rejected": -2.8768653869628906, "logps/chosen": -261.294921875, "logps/rejected": -246.78607177734375, "loss": 0.6687, "rewards/accuracies": 0.75, "rewards/chosen": -0.5186463594436646, "rewards/margins": 0.7705070972442627, "rewards/rejected": -1.2891535758972168, "step": 2745 }, { "epoch": 0.32, "learning_rate": 2.0817043193257639e-07, "logits/chosen": -3.969273805618286, "logits/rejected": -3.685112476348877, "logps/chosen": -307.9874572753906, "logps/rejected": -215.06178283691406, "loss": 0.3798, "rewards/accuracies": 0.75, "rewards/chosen": 0.12640725076198578, "rewards/margins": 1.3675227165222168, "rewards/rejected": -1.2411154508590698, "step": 2746 }, { "epoch": 0.32, "learning_rate": 2.081353154629521e-07, "logits/chosen": -2.0217998027801514, "logits/rejected": -2.2769839763641357, "logps/chosen": -457.26739501953125, "logps/rejected": -249.7937469482422, "loss": 0.6075, "rewards/accuracies": 0.75, "rewards/chosen": -0.17885425686836243, "rewards/margins": 0.5669353008270264, "rewards/rejected": -0.7457895278930664, "step": 2747 }, { "epoch": 0.32, "learning_rate": 2.0810019899332787e-07, "logits/chosen": -3.174325942993164, "logits/rejected": -3.2788195610046387, "logps/chosen": -317.87310791015625, "logps/rejected": -322.45166015625, "loss": 0.3397, "rewards/accuracies": 0.75, "rewards/chosen": 0.18599393963813782, "rewards/margins": 1.6060364246368408, "rewards/rejected": -1.4200422763824463, "step": 2748 }, { "epoch": 0.32, "learning_rate": 2.080650825237036e-07, "logits/chosen": -3.426868200302124, "logits/rejected": -3.4244260787963867, "logps/chosen": -161.81173706054688, "logps/rejected": -268.3340148925781, "loss": 0.3584, "rewards/accuracies": 0.875, "rewards/chosen": -0.020454153418540955, "rewards/margins": 1.8233460187911987, "rewards/rejected": -1.8438003063201904, "step": 2749 }, { "epoch": 0.32, "learning_rate": 2.0802996605407935e-07, "logits/chosen": -2.4972128868103027, "logits/rejected": -2.537996292114258, "logps/chosen": -182.00807189941406, "logps/rejected": -166.27622985839844, "loss": 0.558, "rewards/accuracies": 0.75, "rewards/chosen": 0.4161045253276825, "rewards/margins": 0.886556088924408, "rewards/rejected": -0.4704515337944031, "step": 2750 }, { "epoch": 0.32, "learning_rate": 2.079948495844551e-07, "logits/chosen": -3.267488718032837, "logits/rejected": -3.155862808227539, "logps/chosen": -194.3003387451172, "logps/rejected": -231.23731994628906, "loss": 0.3789, "rewards/accuracies": 0.625, "rewards/chosen": -0.06987342238426208, "rewards/margins": 1.602082371711731, "rewards/rejected": -1.6719558238983154, "step": 2751 }, { "epoch": 0.32, "learning_rate": 2.0795973311483083e-07, "logits/chosen": -2.986726760864258, "logits/rejected": -3.078307867050171, "logps/chosen": -269.416748046875, "logps/rejected": -257.9798889160156, "loss": 0.6469, "rewards/accuracies": 0.5, "rewards/chosen": -0.6248037815093994, "rewards/margins": 0.4553123414516449, "rewards/rejected": -1.0801160335540771, "step": 2752 }, { "epoch": 0.32, "learning_rate": 2.079246166452066e-07, "logits/chosen": -2.354102611541748, "logits/rejected": -2.6754047870635986, "logps/chosen": -321.333251953125, "logps/rejected": -125.45684051513672, "loss": 0.3747, "rewards/accuracies": 1.0, "rewards/chosen": 0.016368567943572998, "rewards/margins": 0.8395527601242065, "rewards/rejected": -0.8231841325759888, "step": 2753 }, { "epoch": 0.32, "learning_rate": 2.0788950017558236e-07, "logits/chosen": -2.1088876724243164, "logits/rejected": -1.9361817836761475, "logps/chosen": -321.8093566894531, "logps/rejected": -336.0527648925781, "loss": 0.3652, "rewards/accuracies": 0.875, "rewards/chosen": 0.003968283534049988, "rewards/margins": 1.2352946996688843, "rewards/rejected": -1.2313264608383179, "step": 2754 }, { "epoch": 0.32, "learning_rate": 2.078543837059581e-07, "logits/chosen": -3.2046656608581543, "logits/rejected": -2.6928887367248535, "logps/chosen": -192.21133422851562, "logps/rejected": -243.51084899902344, "loss": 0.4363, "rewards/accuracies": 0.875, "rewards/chosen": -0.14992228150367737, "rewards/margins": 0.8116161823272705, "rewards/rejected": -0.9615384340286255, "step": 2755 }, { "epoch": 0.32, "learning_rate": 2.0781926723633384e-07, "logits/chosen": -2.474734306335449, "logits/rejected": -2.4608330726623535, "logps/chosen": -262.71148681640625, "logps/rejected": -251.32855224609375, "loss": 0.2938, "rewards/accuracies": 0.875, "rewards/chosen": 0.13674023747444153, "rewards/margins": 1.8013579845428467, "rewards/rejected": -1.664617657661438, "step": 2756 }, { "epoch": 0.32, "learning_rate": 2.0778415076670957e-07, "logits/chosen": -2.46675968170166, "logits/rejected": -2.7378435134887695, "logps/chosen": -218.49761962890625, "logps/rejected": -258.50616455078125, "loss": 0.4627, "rewards/accuracies": 0.75, "rewards/chosen": -0.20747564733028412, "rewards/margins": 1.7474000453948975, "rewards/rejected": -1.9548757076263428, "step": 2757 }, { "epoch": 0.32, "learning_rate": 2.0774903429708533e-07, "logits/chosen": -2.959700107574463, "logits/rejected": -3.173567771911621, "logps/chosen": -257.94110107421875, "logps/rejected": -202.59756469726562, "loss": 0.5463, "rewards/accuracies": 0.75, "rewards/chosen": 8.59573483467102e-05, "rewards/margins": 0.7057973742485046, "rewards/rejected": -0.705711305141449, "step": 2758 }, { "epoch": 0.32, "learning_rate": 2.0771391782746108e-07, "logits/chosen": -2.595471143722534, "logits/rejected": -2.696042060852051, "logps/chosen": -323.83038330078125, "logps/rejected": -326.8404541015625, "loss": 0.5636, "rewards/accuracies": 0.5, "rewards/chosen": -0.28396448493003845, "rewards/margins": 1.4943914413452148, "rewards/rejected": -1.7783560752868652, "step": 2759 }, { "epoch": 0.32, "learning_rate": 2.076788013578368e-07, "logits/chosen": -3.587393283843994, "logits/rejected": -3.5495142936706543, "logps/chosen": -138.2574005126953, "logps/rejected": -197.32254028320312, "loss": 0.2899, "rewards/accuracies": 0.875, "rewards/chosen": 0.5710176825523376, "rewards/margins": 2.0169577598571777, "rewards/rejected": -1.4459400177001953, "step": 2760 }, { "epoch": 0.32, "learning_rate": 2.0764368488821256e-07, "logits/chosen": -2.503964424133301, "logits/rejected": -2.3353896141052246, "logps/chosen": -277.02130126953125, "logps/rejected": -181.201416015625, "loss": 0.3995, "rewards/accuracies": 0.75, "rewards/chosen": -0.08484504371881485, "rewards/margins": 1.4066998958587646, "rewards/rejected": -1.4915450811386108, "step": 2761 }, { "epoch": 0.32, "learning_rate": 2.076085684185883e-07, "logits/chosen": -3.5791070461273193, "logits/rejected": -3.2346675395965576, "logps/chosen": -330.312744140625, "logps/rejected": -341.7666931152344, "loss": 0.5586, "rewards/accuracies": 0.75, "rewards/chosen": -0.4902004599571228, "rewards/margins": 1.5270874500274658, "rewards/rejected": -2.0172882080078125, "step": 2762 }, { "epoch": 0.32, "learning_rate": 2.0757345194896404e-07, "logits/chosen": -3.3797836303710938, "logits/rejected": -3.712411403656006, "logps/chosen": -243.3679656982422, "logps/rejected": -208.43618774414062, "loss": 0.381, "rewards/accuracies": 0.75, "rewards/chosen": 0.051359936594963074, "rewards/margins": 2.0788116455078125, "rewards/rejected": -2.027451515197754, "step": 2763 }, { "epoch": 0.32, "learning_rate": 2.0753833547933982e-07, "logits/chosen": -2.7436885833740234, "logits/rejected": -2.8331990242004395, "logps/chosen": -218.19393920898438, "logps/rejected": -211.0985107421875, "loss": 0.3063, "rewards/accuracies": 0.875, "rewards/chosen": 0.15266427397727966, "rewards/margins": 1.4829375743865967, "rewards/rejected": -1.3302732706069946, "step": 2764 }, { "epoch": 0.32, "learning_rate": 2.0750321900971552e-07, "logits/chosen": -3.272725820541382, "logits/rejected": -3.317335605621338, "logps/chosen": -221.25201416015625, "logps/rejected": -351.0550231933594, "loss": 0.1994, "rewards/accuracies": 1.0, "rewards/chosen": 0.40522170066833496, "rewards/margins": 2.570573568344116, "rewards/rejected": -2.1653518676757812, "step": 2765 }, { "epoch": 0.32, "learning_rate": 2.074681025400913e-07, "logits/chosen": -3.786918878555298, "logits/rejected": -3.7598862648010254, "logps/chosen": -221.38348388671875, "logps/rejected": -287.49920654296875, "loss": 0.3288, "rewards/accuracies": 0.75, "rewards/chosen": -0.20213985443115234, "rewards/margins": 2.4138193130493164, "rewards/rejected": -2.6159591674804688, "step": 2766 }, { "epoch": 0.32, "learning_rate": 2.0743298607046706e-07, "logits/chosen": -2.935328960418701, "logits/rejected": -3.161776065826416, "logps/chosen": -266.54510498046875, "logps/rejected": -200.47454833984375, "loss": 0.6284, "rewards/accuracies": 0.375, "rewards/chosen": -0.5107806324958801, "rewards/margins": 1.097649097442627, "rewards/rejected": -1.6084296703338623, "step": 2767 }, { "epoch": 0.32, "learning_rate": 2.0739786960084278e-07, "logits/chosen": -3.1065666675567627, "logits/rejected": -3.133519411087036, "logps/chosen": -208.64923095703125, "logps/rejected": -164.1324462890625, "loss": 0.3468, "rewards/accuracies": 1.0, "rewards/chosen": 0.267901748418808, "rewards/margins": 1.2319071292877197, "rewards/rejected": -0.9640053510665894, "step": 2768 }, { "epoch": 0.32, "learning_rate": 2.0736275313121854e-07, "logits/chosen": -3.0599682331085205, "logits/rejected": -3.438636064529419, "logps/chosen": -126.22867584228516, "logps/rejected": -323.46881103515625, "loss": 0.1529, "rewards/accuracies": 1.0, "rewards/chosen": 0.11474213004112244, "rewards/margins": 5.443012237548828, "rewards/rejected": -5.328269958496094, "step": 2769 }, { "epoch": 0.32, "learning_rate": 2.0732763666159427e-07, "logits/chosen": -2.548311948776245, "logits/rejected": -2.6767163276672363, "logps/chosen": -273.38616943359375, "logps/rejected": -240.0062713623047, "loss": 0.3454, "rewards/accuracies": 1.0, "rewards/chosen": 0.2300184965133667, "rewards/margins": 1.1098237037658691, "rewards/rejected": -0.8798051476478577, "step": 2770 }, { "epoch": 0.32, "learning_rate": 2.0729252019197002e-07, "logits/chosen": -2.5090506076812744, "logits/rejected": -3.0814170837402344, "logps/chosen": -130.109130859375, "logps/rejected": -165.18136596679688, "loss": 0.6606, "rewards/accuracies": 0.5, "rewards/chosen": -0.07737506181001663, "rewards/margins": 1.5364673137664795, "rewards/rejected": -1.613842248916626, "step": 2771 }, { "epoch": 0.32, "learning_rate": 2.0725740372234577e-07, "logits/chosen": -2.8526933193206787, "logits/rejected": -2.7082886695861816, "logps/chosen": -211.28614807128906, "logps/rejected": -276.4790954589844, "loss": 0.4884, "rewards/accuracies": 0.75, "rewards/chosen": -0.22731192409992218, "rewards/margins": 1.1562751531600952, "rewards/rejected": -1.383587121963501, "step": 2772 }, { "epoch": 0.32, "learning_rate": 2.072222872527215e-07, "logits/chosen": -3.091115951538086, "logits/rejected": -3.334754467010498, "logps/chosen": -289.0819396972656, "logps/rejected": -285.72930908203125, "loss": 0.3764, "rewards/accuracies": 0.875, "rewards/chosen": 0.12595519423484802, "rewards/margins": 1.5188894271850586, "rewards/rejected": -1.3929342031478882, "step": 2773 }, { "epoch": 0.32, "learning_rate": 2.0718717078309725e-07, "logits/chosen": -3.1591386795043945, "logits/rejected": -3.301708936691284, "logps/chosen": -128.5341339111328, "logps/rejected": -224.38369750976562, "loss": 0.3912, "rewards/accuracies": 0.75, "rewards/chosen": -0.048657774925231934, "rewards/margins": 1.9820069074630737, "rewards/rejected": -2.0306649208068848, "step": 2774 }, { "epoch": 0.32, "learning_rate": 2.0715205431347304e-07, "logits/chosen": -2.909804344177246, "logits/rejected": -3.3829526901245117, "logps/chosen": -144.20606994628906, "logps/rejected": -278.80078125, "loss": 0.3897, "rewards/accuracies": 0.875, "rewards/chosen": -0.14013497531414032, "rewards/margins": 2.7419135570526123, "rewards/rejected": -2.8820486068725586, "step": 2775 }, { "epoch": 0.32, "learning_rate": 2.0711693784384874e-07, "logits/chosen": -2.79217529296875, "logits/rejected": -2.8686933517456055, "logps/chosen": -139.03973388671875, "logps/rejected": -317.21795654296875, "loss": 0.3667, "rewards/accuracies": 0.875, "rewards/chosen": -0.05444703996181488, "rewards/margins": 2.672330617904663, "rewards/rejected": -2.7267777919769287, "step": 2776 }, { "epoch": 0.32, "learning_rate": 2.0708182137422452e-07, "logits/chosen": -2.554117202758789, "logits/rejected": -2.5956289768218994, "logps/chosen": -413.4727783203125, "logps/rejected": -317.3256530761719, "loss": 0.3032, "rewards/accuracies": 0.875, "rewards/chosen": 0.07327502965927124, "rewards/margins": 1.98712158203125, "rewards/rejected": -1.9138466119766235, "step": 2777 }, { "epoch": 0.32, "learning_rate": 2.0704670490460024e-07, "logits/chosen": -2.46449613571167, "logits/rejected": -2.649974822998047, "logps/chosen": -416.12799072265625, "logps/rejected": -309.6974182128906, "loss": 0.4816, "rewards/accuracies": 0.75, "rewards/chosen": 0.34918850660324097, "rewards/margins": 1.780132532119751, "rewards/rejected": -1.4309438467025757, "step": 2778 }, { "epoch": 0.32, "learning_rate": 2.07011588434976e-07, "logits/chosen": -3.181380271911621, "logits/rejected": -3.1958529949188232, "logps/chosen": -235.20460510253906, "logps/rejected": -182.61184692382812, "loss": 0.3187, "rewards/accuracies": 1.0, "rewards/chosen": -0.024216219782829285, "rewards/margins": 1.2108616828918457, "rewards/rejected": -1.2350780963897705, "step": 2779 }, { "epoch": 0.32, "learning_rate": 2.0697647196535175e-07, "logits/chosen": -3.7424967288970947, "logits/rejected": -3.408094882965088, "logps/chosen": -135.8411407470703, "logps/rejected": -173.82858276367188, "loss": 0.2339, "rewards/accuracies": 0.875, "rewards/chosen": 0.12902896106243134, "rewards/margins": 2.2972166538238525, "rewards/rejected": -2.1681878566741943, "step": 2780 }, { "epoch": 0.32, "learning_rate": 2.0694135549572748e-07, "logits/chosen": -2.892028331756592, "logits/rejected": -2.8821470737457275, "logps/chosen": -240.58624267578125, "logps/rejected": -223.71792602539062, "loss": 0.5902, "rewards/accuracies": 0.75, "rewards/chosen": -0.6565995216369629, "rewards/margins": 1.448161244392395, "rewards/rejected": -2.1047606468200684, "step": 2781 }, { "epoch": 0.32, "learning_rate": 2.0690623902610323e-07, "logits/chosen": -3.3885231018066406, "logits/rejected": -3.45500111579895, "logps/chosen": -266.329833984375, "logps/rejected": -124.86180877685547, "loss": 0.6957, "rewards/accuracies": 0.625, "rewards/chosen": -0.24081218242645264, "rewards/margins": 1.135359764099121, "rewards/rejected": -1.3761719465255737, "step": 2782 }, { "epoch": 0.32, "learning_rate": 2.0687112255647899e-07, "logits/chosen": -2.9476523399353027, "logits/rejected": -3.3540115356445312, "logps/chosen": -206.1102752685547, "logps/rejected": -278.2396240234375, "loss": 0.1709, "rewards/accuracies": 1.0, "rewards/chosen": 0.33596497774124146, "rewards/margins": 2.307621955871582, "rewards/rejected": -1.9716570377349854, "step": 2783 }, { "epoch": 0.32, "learning_rate": 2.0683600608685471e-07, "logits/chosen": -3.0456900596618652, "logits/rejected": -3.1665358543395996, "logps/chosen": -352.8702087402344, "logps/rejected": -287.72796630859375, "loss": 0.1408, "rewards/accuracies": 1.0, "rewards/chosen": -0.14109206199645996, "rewards/margins": 3.021542549133301, "rewards/rejected": -3.16263484954834, "step": 2784 }, { "epoch": 0.32, "learning_rate": 2.0680088961723047e-07, "logits/chosen": -3.062130928039551, "logits/rejected": -3.2230515480041504, "logps/chosen": -163.9191131591797, "logps/rejected": -199.23892211914062, "loss": 0.4972, "rewards/accuracies": 0.75, "rewards/chosen": -0.09201288968324661, "rewards/margins": 1.4529011249542236, "rewards/rejected": -1.5449140071868896, "step": 2785 }, { "epoch": 0.32, "learning_rate": 2.067657731476062e-07, "logits/chosen": -2.8953661918640137, "logits/rejected": -2.7664976119995117, "logps/chosen": -230.72195434570312, "logps/rejected": -211.48867797851562, "loss": 0.4531, "rewards/accuracies": 0.75, "rewards/chosen": 0.14753729104995728, "rewards/margins": 1.2386822700500488, "rewards/rejected": -1.0911450386047363, "step": 2786 }, { "epoch": 0.32, "learning_rate": 2.0673065667798198e-07, "logits/chosen": -4.048678398132324, "logits/rejected": -3.816167116165161, "logps/chosen": -160.11972045898438, "logps/rejected": -149.6986541748047, "loss": 0.7961, "rewards/accuracies": 0.625, "rewards/chosen": -0.3054066598415375, "rewards/margins": 0.9730106592178345, "rewards/rejected": -1.2784172296524048, "step": 2787 }, { "epoch": 0.32, "learning_rate": 2.0669554020835773e-07, "logits/chosen": -2.844991683959961, "logits/rejected": -2.9796016216278076, "logps/chosen": -180.3058624267578, "logps/rejected": -466.3212890625, "loss": 0.3845, "rewards/accuracies": 0.625, "rewards/chosen": 0.06598556041717529, "rewards/margins": 1.9717737436294556, "rewards/rejected": -1.9057883024215698, "step": 2788 }, { "epoch": 0.32, "learning_rate": 2.0666042373873346e-07, "logits/chosen": -3.070272445678711, "logits/rejected": -3.1582517623901367, "logps/chosen": -260.7377014160156, "logps/rejected": -177.84510803222656, "loss": 0.8234, "rewards/accuracies": 0.25, "rewards/chosen": -0.8787546753883362, "rewards/margins": -0.21073278784751892, "rewards/rejected": -0.6680218577384949, "step": 2789 }, { "epoch": 0.32, "learning_rate": 2.066253072691092e-07, "logits/chosen": -3.138500213623047, "logits/rejected": -2.7797598838806152, "logps/chosen": -315.20343017578125, "logps/rejected": -388.59283447265625, "loss": 0.2333, "rewards/accuracies": 1.0, "rewards/chosen": -0.2606803774833679, "rewards/margins": 1.995790958404541, "rewards/rejected": -2.2564713954925537, "step": 2790 }, { "epoch": 0.32, "learning_rate": 2.0659019079948496e-07, "logits/chosen": -2.8877885341644287, "logits/rejected": -3.1086130142211914, "logps/chosen": -294.93609619140625, "logps/rejected": -224.794921875, "loss": 0.3392, "rewards/accuracies": 0.875, "rewards/chosen": 0.5658361911773682, "rewards/margins": 2.3008816242218018, "rewards/rejected": -1.7350455522537231, "step": 2791 }, { "epoch": 0.32, "learning_rate": 2.065550743298607e-07, "logits/chosen": -3.656029224395752, "logits/rejected": -3.45927095413208, "logps/chosen": -233.8111572265625, "logps/rejected": -216.8090362548828, "loss": 0.5791, "rewards/accuracies": 0.625, "rewards/chosen": -0.552493691444397, "rewards/margins": 1.6606298685073853, "rewards/rejected": -2.213123321533203, "step": 2792 }, { "epoch": 0.32, "learning_rate": 2.0651995786023645e-07, "logits/chosen": -3.5274083614349365, "logits/rejected": -3.4105100631713867, "logps/chosen": -196.47276306152344, "logps/rejected": -199.6464080810547, "loss": 0.3224, "rewards/accuracies": 0.875, "rewards/chosen": 0.5164499878883362, "rewards/margins": 2.451303720474243, "rewards/rejected": -1.9348536729812622, "step": 2793 }, { "epoch": 0.32, "learning_rate": 2.0648484139061217e-07, "logits/chosen": -3.0147829055786133, "logits/rejected": -2.6869168281555176, "logps/chosen": -394.6365661621094, "logps/rejected": -337.3128662109375, "loss": 0.2182, "rewards/accuracies": 1.0, "rewards/chosen": -0.10059966146945953, "rewards/margins": 2.2805778980255127, "rewards/rejected": -2.3811776638031006, "step": 2794 }, { "epoch": 0.32, "learning_rate": 2.0644972492098793e-07, "logits/chosen": -3.406358242034912, "logits/rejected": -3.2000293731689453, "logps/chosen": -259.47711181640625, "logps/rejected": -237.4225616455078, "loss": 0.3314, "rewards/accuracies": 0.875, "rewards/chosen": -0.025759266689419746, "rewards/margins": 1.5062122344970703, "rewards/rejected": -1.5319716930389404, "step": 2795 }, { "epoch": 0.32, "learning_rate": 2.0641460845136368e-07, "logits/chosen": -3.4297091960906982, "logits/rejected": -3.576140880584717, "logps/chosen": -277.160888671875, "logps/rejected": -592.968017578125, "loss": 0.3405, "rewards/accuracies": 0.875, "rewards/chosen": 0.10689663141965866, "rewards/margins": 2.199827194213867, "rewards/rejected": -2.092930555343628, "step": 2796 }, { "epoch": 0.32, "learning_rate": 2.063794919817394e-07, "logits/chosen": -3.271301746368408, "logits/rejected": -3.4515151977539062, "logps/chosen": -228.8732452392578, "logps/rejected": -218.2284393310547, "loss": 0.5952, "rewards/accuracies": 0.625, "rewards/chosen": -0.722952127456665, "rewards/margins": 0.5262311100959778, "rewards/rejected": -1.249183177947998, "step": 2797 }, { "epoch": 0.32, "learning_rate": 2.063443755121152e-07, "logits/chosen": -3.462169647216797, "logits/rejected": -3.5262911319732666, "logps/chosen": -180.69976806640625, "logps/rejected": -245.34774780273438, "loss": 0.3497, "rewards/accuracies": 0.75, "rewards/chosen": -0.2832827568054199, "rewards/margins": 2.034862518310547, "rewards/rejected": -2.318145275115967, "step": 2798 }, { "epoch": 0.32, "learning_rate": 2.0630925904249094e-07, "logits/chosen": -2.850862503051758, "logits/rejected": -2.941068649291992, "logps/chosen": -196.75091552734375, "logps/rejected": -187.80892944335938, "loss": 0.5323, "rewards/accuracies": 0.625, "rewards/chosen": -0.4563661813735962, "rewards/margins": 0.8169429302215576, "rewards/rejected": -1.2733091115951538, "step": 2799 }, { "epoch": 0.32, "learning_rate": 2.0627414257286667e-07, "logits/chosen": -3.767493486404419, "logits/rejected": -3.849728584289551, "logps/chosen": -185.01150512695312, "logps/rejected": -310.3260498046875, "loss": 0.2299, "rewards/accuracies": 1.0, "rewards/chosen": -0.06735554337501526, "rewards/margins": 2.8156516551971436, "rewards/rejected": -2.883007049560547, "step": 2800 }, { "epoch": 0.32, "learning_rate": 2.0623902610324242e-07, "logits/chosen": -3.025587558746338, "logits/rejected": -2.7623236179351807, "logps/chosen": -162.96011352539062, "logps/rejected": -173.7723846435547, "loss": 0.5137, "rewards/accuracies": 0.875, "rewards/chosen": -0.053438737988471985, "rewards/margins": 0.9953893423080444, "rewards/rejected": -1.048828125, "step": 2801 }, { "epoch": 0.32, "learning_rate": 2.0620390963361815e-07, "logits/chosen": -2.5162322521209717, "logits/rejected": -2.9308698177337646, "logps/chosen": -311.500244140625, "logps/rejected": -310.20361328125, "loss": 0.3918, "rewards/accuracies": 0.75, "rewards/chosen": -0.3595004677772522, "rewards/margins": 1.6310007572174072, "rewards/rejected": -1.9905012845993042, "step": 2802 }, { "epoch": 0.32, "learning_rate": 2.061687931639939e-07, "logits/chosen": -3.3017165660858154, "logits/rejected": -3.515655040740967, "logps/chosen": -176.8379669189453, "logps/rejected": -208.91122436523438, "loss": 0.3498, "rewards/accuracies": 0.75, "rewards/chosen": 0.21133531630039215, "rewards/margins": 1.9963289499282837, "rewards/rejected": -1.7849936485290527, "step": 2803 }, { "epoch": 0.32, "learning_rate": 2.0613367669436966e-07, "logits/chosen": -2.5642123222351074, "logits/rejected": -2.490652561187744, "logps/chosen": -448.76416015625, "logps/rejected": -173.724853515625, "loss": 0.2261, "rewards/accuracies": 1.0, "rewards/chosen": 0.9064850807189941, "rewards/margins": 1.7373032569885254, "rewards/rejected": -0.8308181762695312, "step": 2804 }, { "epoch": 0.32, "learning_rate": 2.0609856022474539e-07, "logits/chosen": -3.548060894012451, "logits/rejected": -3.19390869140625, "logps/chosen": -243.50192260742188, "logps/rejected": -245.09463500976562, "loss": 0.4526, "rewards/accuracies": 0.75, "rewards/chosen": 0.09417843073606491, "rewards/margins": 1.1738923788070679, "rewards/rejected": -1.0797138214111328, "step": 2805 }, { "epoch": 0.32, "learning_rate": 2.0606344375512114e-07, "logits/chosen": -2.5123791694641113, "logits/rejected": -2.4777791500091553, "logps/chosen": -459.89471435546875, "logps/rejected": -254.26239013671875, "loss": 0.5917, "rewards/accuracies": 0.75, "rewards/chosen": -0.14879187941551208, "rewards/margins": 0.48147064447402954, "rewards/rejected": -0.630262553691864, "step": 2806 }, { "epoch": 0.32, "learning_rate": 2.0602832728549687e-07, "logits/chosen": -2.391932249069214, "logits/rejected": -2.4828383922576904, "logps/chosen": -355.3644104003906, "logps/rejected": -379.36676025390625, "loss": 0.2492, "rewards/accuracies": 0.875, "rewards/chosen": -0.04231920838356018, "rewards/margins": 2.014566421508789, "rewards/rejected": -2.0568857192993164, "step": 2807 }, { "epoch": 0.32, "learning_rate": 2.0599321081587262e-07, "logits/chosen": -3.1494741439819336, "logits/rejected": -3.4154911041259766, "logps/chosen": -219.63803100585938, "logps/rejected": -263.78009033203125, "loss": 0.2959, "rewards/accuracies": 0.875, "rewards/chosen": -0.16116540133953094, "rewards/margins": 2.823303699493408, "rewards/rejected": -2.984469175338745, "step": 2808 }, { "epoch": 0.32, "learning_rate": 2.059580943462484e-07, "logits/chosen": -3.585617780685425, "logits/rejected": -3.779629945755005, "logps/chosen": -391.9552001953125, "logps/rejected": -288.7772216796875, "loss": 0.24, "rewards/accuracies": 1.0, "rewards/chosen": 0.0698586106300354, "rewards/margins": 1.5362114906311035, "rewards/rejected": -1.466352939605713, "step": 2809 }, { "epoch": 0.32, "learning_rate": 2.059229778766241e-07, "logits/chosen": -2.9250593185424805, "logits/rejected": -3.21315860748291, "logps/chosen": -180.68344116210938, "logps/rejected": -217.66542053222656, "loss": 0.5711, "rewards/accuracies": 0.625, "rewards/chosen": 0.024542585015296936, "rewards/margins": 0.9317641854286194, "rewards/rejected": -0.9072216749191284, "step": 2810 }, { "epoch": 0.32, "learning_rate": 2.0588786140699988e-07, "logits/chosen": -3.785414934158325, "logits/rejected": -3.5212647914886475, "logps/chosen": -331.46636962890625, "logps/rejected": -310.186767578125, "loss": 0.3021, "rewards/accuracies": 0.75, "rewards/chosen": -0.1537300944328308, "rewards/margins": 1.9160597324371338, "rewards/rejected": -2.0697898864746094, "step": 2811 }, { "epoch": 0.32, "learning_rate": 2.0585274493737564e-07, "logits/chosen": -3.8488528728485107, "logits/rejected": -3.7776103019714355, "logps/chosen": -181.44529724121094, "logps/rejected": -235.0676727294922, "loss": 0.2383, "rewards/accuracies": 1.0, "rewards/chosen": 0.27591148018836975, "rewards/margins": 2.671863079071045, "rewards/rejected": -2.395951747894287, "step": 2812 }, { "epoch": 0.32, "learning_rate": 2.0581762846775136e-07, "logits/chosen": -2.9657726287841797, "logits/rejected": -3.0691418647766113, "logps/chosen": -293.22412109375, "logps/rejected": -170.36061096191406, "loss": 0.7891, "rewards/accuracies": 0.625, "rewards/chosen": -0.0776626467704773, "rewards/margins": 0.3171939253807068, "rewards/rejected": -0.3948565721511841, "step": 2813 }, { "epoch": 0.32, "learning_rate": 2.0578251199812712e-07, "logits/chosen": -3.835513114929199, "logits/rejected": -3.7529373168945312, "logps/chosen": -291.42938232421875, "logps/rejected": -264.9427490234375, "loss": 0.3544, "rewards/accuracies": 0.875, "rewards/chosen": -0.1858813464641571, "rewards/margins": 1.128273606300354, "rewards/rejected": -1.314155101776123, "step": 2814 }, { "epoch": 0.32, "learning_rate": 2.0574739552850285e-07, "logits/chosen": -3.304427146911621, "logits/rejected": -3.129387617111206, "logps/chosen": -331.84478759765625, "logps/rejected": -178.6880340576172, "loss": 0.4213, "rewards/accuracies": 0.875, "rewards/chosen": 0.19511562585830688, "rewards/margins": 0.9570579528808594, "rewards/rejected": -0.7619423866271973, "step": 2815 }, { "epoch": 0.32, "learning_rate": 2.057122790588786e-07, "logits/chosen": -3.395862102508545, "logits/rejected": -3.4491450786590576, "logps/chosen": -218.5532989501953, "logps/rejected": -176.27085876464844, "loss": 0.3029, "rewards/accuracies": 0.75, "rewards/chosen": 0.30331024527549744, "rewards/margins": 1.599184274673462, "rewards/rejected": -1.2958738803863525, "step": 2816 }, { "epoch": 0.32, "learning_rate": 2.0567716258925435e-07, "logits/chosen": -3.0377089977264404, "logits/rejected": -3.1854615211486816, "logps/chosen": -200.83856201171875, "logps/rejected": -249.68304443359375, "loss": 0.296, "rewards/accuracies": 1.0, "rewards/chosen": 0.07921233773231506, "rewards/margins": 1.6125292778015137, "rewards/rejected": -1.5333170890808105, "step": 2817 }, { "epoch": 0.32, "learning_rate": 2.0564204611963008e-07, "logits/chosen": -3.005790948867798, "logits/rejected": -3.1264028549194336, "logps/chosen": -309.28253173828125, "logps/rejected": -301.784912109375, "loss": 0.124, "rewards/accuracies": 1.0, "rewards/chosen": 0.4166223704814911, "rewards/margins": 2.5665364265441895, "rewards/rejected": -2.149913787841797, "step": 2818 }, { "epoch": 0.32, "learning_rate": 2.0560692965000583e-07, "logits/chosen": -3.5147624015808105, "logits/rejected": -3.195995569229126, "logps/chosen": -373.454345703125, "logps/rejected": -318.0096435546875, "loss": 0.217, "rewards/accuracies": 1.0, "rewards/chosen": -0.49907389283180237, "rewards/margins": 2.2315163612365723, "rewards/rejected": -2.7305901050567627, "step": 2819 }, { "epoch": 0.33, "learning_rate": 2.0557181318038161e-07, "logits/chosen": -2.7856035232543945, "logits/rejected": -2.7912845611572266, "logps/chosen": -140.02053833007812, "logps/rejected": -142.15567016601562, "loss": 0.486, "rewards/accuracies": 0.75, "rewards/chosen": 0.09772336483001709, "rewards/margins": 0.6865450739860535, "rewards/rejected": -0.5888217091560364, "step": 2820 }, { "epoch": 0.33, "learning_rate": 2.0553669671075734e-07, "logits/chosen": -3.6302576065063477, "logits/rejected": -3.485506772994995, "logps/chosen": -252.49411010742188, "logps/rejected": -312.7757568359375, "loss": 0.307, "rewards/accuracies": 0.875, "rewards/chosen": 0.2792898714542389, "rewards/margins": 1.900126338005066, "rewards/rejected": -1.6208364963531494, "step": 2821 }, { "epoch": 0.33, "learning_rate": 2.055015802411331e-07, "logits/chosen": -3.8503851890563965, "logits/rejected": -3.8455076217651367, "logps/chosen": -222.40640258789062, "logps/rejected": -268.122314453125, "loss": 0.3039, "rewards/accuracies": 0.875, "rewards/chosen": -0.012696027755737305, "rewards/margins": 2.311507225036621, "rewards/rejected": -2.3242032527923584, "step": 2822 }, { "epoch": 0.33, "learning_rate": 2.0546646377150882e-07, "logits/chosen": -3.299320697784424, "logits/rejected": -3.4471657276153564, "logps/chosen": -300.37921142578125, "logps/rejected": -329.3104553222656, "loss": 0.2131, "rewards/accuracies": 0.875, "rewards/chosen": -0.1285678595304489, "rewards/margins": 2.6096432209014893, "rewards/rejected": -2.738211154937744, "step": 2823 }, { "epoch": 0.33, "learning_rate": 2.0543134730188458e-07, "logits/chosen": -3.3328328132629395, "logits/rejected": -3.4418020248413086, "logps/chosen": -111.92011260986328, "logps/rejected": -243.8980712890625, "loss": 0.5381, "rewards/accuracies": 0.75, "rewards/chosen": 0.01608555018901825, "rewards/margins": 1.337178111076355, "rewards/rejected": -1.3210923671722412, "step": 2824 }, { "epoch": 0.33, "learning_rate": 2.0539623083226033e-07, "logits/chosen": -3.667369842529297, "logits/rejected": -3.9021668434143066, "logps/chosen": -160.41021728515625, "logps/rejected": -196.22976684570312, "loss": 0.2332, "rewards/accuracies": 0.875, "rewards/chosen": -0.08319275081157684, "rewards/margins": 2.984401226043701, "rewards/rejected": -3.067594051361084, "step": 2825 }, { "epoch": 0.33, "learning_rate": 2.0536111436263606e-07, "logits/chosen": -3.1877806186676025, "logits/rejected": -3.57426118850708, "logps/chosen": -132.1624298095703, "logps/rejected": -165.39495849609375, "loss": 0.2547, "rewards/accuracies": 1.0, "rewards/chosen": 0.24397379159927368, "rewards/margins": 2.3526341915130615, "rewards/rejected": -2.1086602210998535, "step": 2826 }, { "epoch": 0.33, "learning_rate": 2.053259978930118e-07, "logits/chosen": -2.2200443744659424, "logits/rejected": -2.239499807357788, "logps/chosen": -264.8586730957031, "logps/rejected": -207.8159942626953, "loss": 0.2803, "rewards/accuracies": 0.875, "rewards/chosen": -0.33564120531082153, "rewards/margins": 1.5681999921798706, "rewards/rejected": -1.903841257095337, "step": 2827 }, { "epoch": 0.33, "learning_rate": 2.0529088142338757e-07, "logits/chosen": -3.1319541931152344, "logits/rejected": -3.5582733154296875, "logps/chosen": -251.1297149658203, "logps/rejected": -203.61962890625, "loss": 0.4657, "rewards/accuracies": 0.75, "rewards/chosen": -0.5164710283279419, "rewards/margins": 1.9681981801986694, "rewards/rejected": -2.4846692085266113, "step": 2828 }, { "epoch": 0.33, "learning_rate": 2.052557649537633e-07, "logits/chosen": -2.409322738647461, "logits/rejected": -2.3061256408691406, "logps/chosen": -262.6789855957031, "logps/rejected": -265.337158203125, "loss": 0.3967, "rewards/accuracies": 0.875, "rewards/chosen": 0.13889148831367493, "rewards/margins": 1.3595165014266968, "rewards/rejected": -1.2206250429153442, "step": 2829 }, { "epoch": 0.33, "learning_rate": 2.0522064848413905e-07, "logits/chosen": -3.533191204071045, "logits/rejected": -3.625004768371582, "logps/chosen": -190.8134765625, "logps/rejected": -202.21246337890625, "loss": 0.8439, "rewards/accuracies": 0.375, "rewards/chosen": -0.18298617005348206, "rewards/margins": -0.12491105496883392, "rewards/rejected": -0.05807510018348694, "step": 2830 }, { "epoch": 0.33, "learning_rate": 2.0518553201451477e-07, "logits/chosen": -2.260848045349121, "logits/rejected": -2.3793628215789795, "logps/chosen": -226.67410278320312, "logps/rejected": -203.89141845703125, "loss": 0.7461, "rewards/accuracies": 0.625, "rewards/chosen": -0.41478753089904785, "rewards/margins": 0.7413772940635681, "rewards/rejected": -1.1561648845672607, "step": 2831 }, { "epoch": 0.33, "learning_rate": 2.0515041554489055e-07, "logits/chosen": -3.3377904891967773, "logits/rejected": -3.2721962928771973, "logps/chosen": -445.6872863769531, "logps/rejected": -260.747314453125, "loss": 0.3066, "rewards/accuracies": 1.0, "rewards/chosen": -0.37181031703948975, "rewards/margins": 1.3510496616363525, "rewards/rejected": -1.7228599786758423, "step": 2832 }, { "epoch": 0.33, "learning_rate": 2.051152990752663e-07, "logits/chosen": -3.283334970474243, "logits/rejected": -3.422372817993164, "logps/chosen": -283.694580078125, "logps/rejected": -213.81796264648438, "loss": 0.4655, "rewards/accuracies": 0.875, "rewards/chosen": 0.10639706254005432, "rewards/margins": 0.8843005895614624, "rewards/rejected": -0.7779035568237305, "step": 2833 }, { "epoch": 0.33, "learning_rate": 2.0508018260564204e-07, "logits/chosen": -2.571668863296509, "logits/rejected": -2.686441659927368, "logps/chosen": -436.0262451171875, "logps/rejected": -267.1753234863281, "loss": 0.2666, "rewards/accuracies": 1.0, "rewards/chosen": 0.09400790184736252, "rewards/margins": 1.5225231647491455, "rewards/rejected": -1.4285151958465576, "step": 2834 }, { "epoch": 0.33, "learning_rate": 2.050450661360178e-07, "logits/chosen": -3.2354795932769775, "logits/rejected": -3.2029502391815186, "logps/chosen": -245.97073364257812, "logps/rejected": -203.56268310546875, "loss": 0.3259, "rewards/accuracies": 0.875, "rewards/chosen": 0.29072803258895874, "rewards/margins": 1.2958617210388184, "rewards/rejected": -1.0051336288452148, "step": 2835 }, { "epoch": 0.33, "learning_rate": 2.0500994966639354e-07, "logits/chosen": -3.5662002563476562, "logits/rejected": -3.201505422592163, "logps/chosen": -391.55828857421875, "logps/rejected": -431.5497131347656, "loss": 0.3071, "rewards/accuracies": 0.75, "rewards/chosen": 0.27092230319976807, "rewards/margins": 2.3845252990722656, "rewards/rejected": -2.113602876663208, "step": 2836 }, { "epoch": 0.33, "learning_rate": 2.0497483319676927e-07, "logits/chosen": -3.1553406715393066, "logits/rejected": -3.542404890060425, "logps/chosen": -224.5789337158203, "logps/rejected": -195.91854858398438, "loss": 0.409, "rewards/accuracies": 0.75, "rewards/chosen": 0.13149698078632355, "rewards/margins": 1.4715056419372559, "rewards/rejected": -1.3400086164474487, "step": 2837 }, { "epoch": 0.33, "learning_rate": 2.0493971672714503e-07, "logits/chosen": -2.8095169067382812, "logits/rejected": -3.3172037601470947, "logps/chosen": -206.4317169189453, "logps/rejected": -356.79345703125, "loss": 0.1801, "rewards/accuracies": 0.875, "rewards/chosen": 0.18328090012073517, "rewards/margins": 4.173395156860352, "rewards/rejected": -3.990114212036133, "step": 2838 }, { "epoch": 0.33, "learning_rate": 2.0490460025752075e-07, "logits/chosen": -3.175493001937866, "logits/rejected": -3.241122007369995, "logps/chosen": -343.7049560546875, "logps/rejected": -304.8462829589844, "loss": 0.6903, "rewards/accuracies": 0.625, "rewards/chosen": -0.6818311214447021, "rewards/margins": 0.706986129283905, "rewards/rejected": -1.3888171911239624, "step": 2839 }, { "epoch": 0.33, "learning_rate": 2.048694837878965e-07, "logits/chosen": -3.2136454582214355, "logits/rejected": -3.106694459915161, "logps/chosen": -335.4702453613281, "logps/rejected": -334.1417541503906, "loss": 0.5216, "rewards/accuracies": 0.625, "rewards/chosen": -0.11840799450874329, "rewards/margins": 0.747544527053833, "rewards/rejected": -0.8659524917602539, "step": 2840 }, { "epoch": 0.33, "learning_rate": 2.0483436731827226e-07, "logits/chosen": -3.2005863189697266, "logits/rejected": -3.286083221435547, "logps/chosen": -253.33087158203125, "logps/rejected": -349.37103271484375, "loss": 0.2865, "rewards/accuracies": 0.75, "rewards/chosen": -0.15498188138008118, "rewards/margins": 2.362828016281128, "rewards/rejected": -2.5178098678588867, "step": 2841 }, { "epoch": 0.33, "learning_rate": 2.04799250848648e-07, "logits/chosen": -3.661076545715332, "logits/rejected": -3.7284059524536133, "logps/chosen": -98.0713119506836, "logps/rejected": -204.99378967285156, "loss": 0.3115, "rewards/accuracies": 0.875, "rewards/chosen": 0.07008009403944016, "rewards/margins": 3.4748270511627197, "rewards/rejected": -3.4047467708587646, "step": 2842 }, { "epoch": 0.33, "learning_rate": 2.0476413437902377e-07, "logits/chosen": -2.9179842472076416, "logits/rejected": -2.96488881111145, "logps/chosen": -299.5823059082031, "logps/rejected": -297.158203125, "loss": 0.6925, "rewards/accuracies": 0.625, "rewards/chosen": 0.07056504487991333, "rewards/margins": 0.4271971583366394, "rewards/rejected": -0.35663214325904846, "step": 2843 }, { "epoch": 0.33, "learning_rate": 2.0472901790939952e-07, "logits/chosen": -3.262031316757202, "logits/rejected": -3.5883002281188965, "logps/chosen": -165.2972412109375, "logps/rejected": -274.709716796875, "loss": 0.1104, "rewards/accuracies": 1.0, "rewards/chosen": 0.05183995142579079, "rewards/margins": 2.7336294651031494, "rewards/rejected": -2.6817893981933594, "step": 2844 }, { "epoch": 0.33, "learning_rate": 2.0469390143977525e-07, "logits/chosen": -3.4286673069000244, "logits/rejected": -3.506324291229248, "logps/chosen": -193.91348266601562, "logps/rejected": -258.2356262207031, "loss": 0.5467, "rewards/accuracies": 0.625, "rewards/chosen": -0.20075538754463196, "rewards/margins": 1.1406350135803223, "rewards/rejected": -1.3413903713226318, "step": 2845 }, { "epoch": 0.33, "learning_rate": 2.04658784970151e-07, "logits/chosen": -3.846790313720703, "logits/rejected": -3.740144968032837, "logps/chosen": -266.9960021972656, "logps/rejected": -275.77166748046875, "loss": 0.166, "rewards/accuracies": 1.0, "rewards/chosen": 0.36887016892433167, "rewards/margins": 2.7621688842773438, "rewards/rejected": -2.393298864364624, "step": 2846 }, { "epoch": 0.33, "learning_rate": 2.0462366850052673e-07, "logits/chosen": -3.2154715061187744, "logits/rejected": -3.0804696083068848, "logps/chosen": -156.24244689941406, "logps/rejected": -226.55938720703125, "loss": 0.3985, "rewards/accuracies": 0.875, "rewards/chosen": 0.16894464194774628, "rewards/margins": 1.3082945346832275, "rewards/rejected": -1.1393499374389648, "step": 2847 }, { "epoch": 0.33, "learning_rate": 2.0458855203090248e-07, "logits/chosen": -2.71974515914917, "logits/rejected": -2.7606868743896484, "logps/chosen": -326.88818359375, "logps/rejected": -301.45758056640625, "loss": 0.2039, "rewards/accuracies": 0.875, "rewards/chosen": 0.1638636291027069, "rewards/margins": 2.366168975830078, "rewards/rejected": -2.202305316925049, "step": 2848 }, { "epoch": 0.33, "learning_rate": 2.0455343556127824e-07, "logits/chosen": -4.187641143798828, "logits/rejected": -3.686187267303467, "logps/chosen": -306.5660400390625, "logps/rejected": -220.70330810546875, "loss": 0.5584, "rewards/accuracies": 0.75, "rewards/chosen": -0.7266150712966919, "rewards/margins": 0.987389862537384, "rewards/rejected": -1.7140049934387207, "step": 2849 }, { "epoch": 0.33, "learning_rate": 2.0451831909165397e-07, "logits/chosen": -3.0918784141540527, "logits/rejected": -3.3395795822143555, "logps/chosen": -127.67268371582031, "logps/rejected": -205.4291534423828, "loss": 0.53, "rewards/accuracies": 0.625, "rewards/chosen": -0.3469720780849457, "rewards/margins": 0.727628231048584, "rewards/rejected": -1.074600338935852, "step": 2850 }, { "epoch": 0.33, "learning_rate": 2.0448320262202972e-07, "logits/chosen": -2.266724109649658, "logits/rejected": -2.6965675354003906, "logps/chosen": -367.3207092285156, "logps/rejected": -236.07861328125, "loss": 0.2026, "rewards/accuracies": 1.0, "rewards/chosen": 0.44943588972091675, "rewards/margins": 2.4586939811706543, "rewards/rejected": -2.0092580318450928, "step": 2851 }, { "epoch": 0.33, "learning_rate": 2.0444808615240545e-07, "logits/chosen": -3.265845775604248, "logits/rejected": -3.5107994079589844, "logps/chosen": -376.89837646484375, "logps/rejected": -406.1129150390625, "loss": 0.4289, "rewards/accuracies": 0.875, "rewards/chosen": 0.08156808465719223, "rewards/margins": 2.337326765060425, "rewards/rejected": -2.255758762359619, "step": 2852 }, { "epoch": 0.33, "learning_rate": 2.044129696827812e-07, "logits/chosen": -3.413444995880127, "logits/rejected": -3.628187894821167, "logps/chosen": -233.23023986816406, "logps/rejected": -418.2298278808594, "loss": 0.5283, "rewards/accuracies": 0.625, "rewards/chosen": -0.13183501362800598, "rewards/margins": 0.9775283336639404, "rewards/rejected": -1.109363317489624, "step": 2853 }, { "epoch": 0.33, "learning_rate": 2.0437785321315698e-07, "logits/chosen": -2.5081512928009033, "logits/rejected": -2.5851986408233643, "logps/chosen": -155.041748046875, "logps/rejected": -205.93833923339844, "loss": 0.404, "rewards/accuracies": 0.75, "rewards/chosen": 0.49991554021835327, "rewards/margins": 1.5381900072097778, "rewards/rejected": -1.0382745265960693, "step": 2854 }, { "epoch": 0.33, "learning_rate": 2.043427367435327e-07, "logits/chosen": -2.7078542709350586, "logits/rejected": -2.7510008811950684, "logps/chosen": -145.48406982421875, "logps/rejected": -130.09799194335938, "loss": 0.6858, "rewards/accuracies": 0.625, "rewards/chosen": -0.5758556127548218, "rewards/margins": 0.5272294282913208, "rewards/rejected": -1.1030850410461426, "step": 2855 }, { "epoch": 0.33, "learning_rate": 2.0430762027390846e-07, "logits/chosen": -3.8277082443237305, "logits/rejected": -3.7483091354370117, "logps/chosen": -107.4218521118164, "logps/rejected": -194.98402404785156, "loss": 0.3055, "rewards/accuracies": 0.875, "rewards/chosen": 0.3449106216430664, "rewards/margins": 1.5563803911209106, "rewards/rejected": -1.2114696502685547, "step": 2856 }, { "epoch": 0.33, "learning_rate": 2.0427250380428422e-07, "logits/chosen": -3.4146618843078613, "logits/rejected": -3.019530773162842, "logps/chosen": -248.57144165039062, "logps/rejected": -315.67120361328125, "loss": 0.3784, "rewards/accuracies": 0.875, "rewards/chosen": -0.08395737409591675, "rewards/margins": 1.0278345346450806, "rewards/rejected": -1.1117918491363525, "step": 2857 }, { "epoch": 0.33, "learning_rate": 2.0423738733465994e-07, "logits/chosen": -3.355469226837158, "logits/rejected": -3.481398344039917, "logps/chosen": -287.9048156738281, "logps/rejected": -209.59620666503906, "loss": 0.631, "rewards/accuracies": 0.625, "rewards/chosen": -0.22663965821266174, "rewards/margins": 0.5752274394035339, "rewards/rejected": -0.8018671274185181, "step": 2858 }, { "epoch": 0.33, "learning_rate": 2.042022708650357e-07, "logits/chosen": -3.780696153640747, "logits/rejected": -3.8148794174194336, "logps/chosen": -218.00885009765625, "logps/rejected": -243.75308227539062, "loss": 0.4824, "rewards/accuracies": 0.625, "rewards/chosen": 0.4431857764720917, "rewards/margins": 2.1373233795166016, "rewards/rejected": -1.694137692451477, "step": 2859 }, { "epoch": 0.33, "learning_rate": 2.0416715439541142e-07, "logits/chosen": -3.4989047050476074, "logits/rejected": -3.5055723190307617, "logps/chosen": -394.12890625, "logps/rejected": -231.61492919921875, "loss": 0.4735, "rewards/accuracies": 0.625, "rewards/chosen": 0.2130986750125885, "rewards/margins": 2.1727070808410645, "rewards/rejected": -1.9596086740493774, "step": 2860 }, { "epoch": 0.33, "learning_rate": 2.0413203792578718e-07, "logits/chosen": -2.994781017303467, "logits/rejected": -2.5221192836761475, "logps/chosen": -446.52862548828125, "logps/rejected": -372.12017822265625, "loss": 0.5404, "rewards/accuracies": 0.75, "rewards/chosen": -0.052041590213775635, "rewards/margins": 1.267101764678955, "rewards/rejected": -1.3191434144973755, "step": 2861 }, { "epoch": 0.33, "learning_rate": 2.0409692145616293e-07, "logits/chosen": -3.3700790405273438, "logits/rejected": -3.4661407470703125, "logps/chosen": -143.74241638183594, "logps/rejected": -221.1109161376953, "loss": 0.2348, "rewards/accuracies": 1.0, "rewards/chosen": 0.28408685326576233, "rewards/margins": 1.802150011062622, "rewards/rejected": -1.5180631875991821, "step": 2862 }, { "epoch": 0.33, "learning_rate": 2.0406180498653866e-07, "logits/chosen": -3.284825325012207, "logits/rejected": -3.4520740509033203, "logps/chosen": -327.230224609375, "logps/rejected": -281.76812744140625, "loss": 0.2492, "rewards/accuracies": 0.875, "rewards/chosen": 0.24369294941425323, "rewards/margins": 3.1893839836120605, "rewards/rejected": -2.9456911087036133, "step": 2863 }, { "epoch": 0.33, "learning_rate": 2.0402668851691441e-07, "logits/chosen": -2.760578155517578, "logits/rejected": -2.690955877304077, "logps/chosen": -371.62115478515625, "logps/rejected": -229.04368591308594, "loss": 0.3358, "rewards/accuracies": 0.875, "rewards/chosen": 0.5389971137046814, "rewards/margins": 1.2430037260055542, "rewards/rejected": -0.7040066719055176, "step": 2864 }, { "epoch": 0.33, "learning_rate": 2.039915720472902e-07, "logits/chosen": -3.003124237060547, "logits/rejected": -2.8219361305236816, "logps/chosen": -185.51058959960938, "logps/rejected": -178.19921875, "loss": 0.5855, "rewards/accuracies": 0.75, "rewards/chosen": -0.23136897385120392, "rewards/margins": 0.6254237294197083, "rewards/rejected": -0.856792688369751, "step": 2865 }, { "epoch": 0.33, "learning_rate": 2.0395645557766592e-07, "logits/chosen": -3.105785369873047, "logits/rejected": -3.4216971397399902, "logps/chosen": -161.64126586914062, "logps/rejected": -152.860595703125, "loss": 0.6072, "rewards/accuracies": 0.875, "rewards/chosen": -0.043644893914461136, "rewards/margins": 1.243830919265747, "rewards/rejected": -1.287475824356079, "step": 2866 }, { "epoch": 0.33, "learning_rate": 2.0392133910804168e-07, "logits/chosen": -3.3430590629577637, "logits/rejected": -3.047654390335083, "logps/chosen": -443.2104187011719, "logps/rejected": -242.50274658203125, "loss": 0.3402, "rewards/accuracies": 1.0, "rewards/chosen": -0.0757419615983963, "rewards/margins": 1.3590400218963623, "rewards/rejected": -1.4347820281982422, "step": 2867 }, { "epoch": 0.33, "learning_rate": 2.038862226384174e-07, "logits/chosen": -3.5673904418945312, "logits/rejected": -3.885878086090088, "logps/chosen": -104.0085678100586, "logps/rejected": -182.9176025390625, "loss": 0.4167, "rewards/accuracies": 0.75, "rewards/chosen": -0.0320337638258934, "rewards/margins": 1.4112091064453125, "rewards/rejected": -1.4432427883148193, "step": 2868 }, { "epoch": 0.33, "learning_rate": 2.0385110616879316e-07, "logits/chosen": -2.8177428245544434, "logits/rejected": -2.9365592002868652, "logps/chosen": -236.85020446777344, "logps/rejected": -162.49288940429688, "loss": 0.2878, "rewards/accuracies": 1.0, "rewards/chosen": 0.5083247423171997, "rewards/margins": 1.562101125717163, "rewards/rejected": -1.0537763833999634, "step": 2869 }, { "epoch": 0.33, "learning_rate": 2.038159896991689e-07, "logits/chosen": -3.3119957447052, "logits/rejected": -3.5376977920532227, "logps/chosen": -80.22996520996094, "logps/rejected": -229.53500366210938, "loss": 0.2071, "rewards/accuracies": 1.0, "rewards/chosen": 0.06617847084999084, "rewards/margins": 2.495213508605957, "rewards/rejected": -2.429035186767578, "step": 2870 }, { "epoch": 0.33, "learning_rate": 2.0378087322954464e-07, "logits/chosen": -2.822551727294922, "logits/rejected": -3.1431126594543457, "logps/chosen": -201.25804138183594, "logps/rejected": -216.75155639648438, "loss": 0.4443, "rewards/accuracies": 0.75, "rewards/chosen": 0.24011985957622528, "rewards/margins": 1.0317258834838867, "rewards/rejected": -0.7916060090065002, "step": 2871 }, { "epoch": 0.33, "learning_rate": 2.037457567599204e-07, "logits/chosen": -3.9176366329193115, "logits/rejected": -3.9342238903045654, "logps/chosen": -198.0251922607422, "logps/rejected": -265.783447265625, "loss": 0.1576, "rewards/accuracies": 1.0, "rewards/chosen": -0.24891816079616547, "rewards/margins": 2.949993133544922, "rewards/rejected": -3.198911190032959, "step": 2872 }, { "epoch": 0.33, "learning_rate": 2.0371064029029615e-07, "logits/chosen": -3.441892623901367, "logits/rejected": -3.429807662963867, "logps/chosen": -155.37017822265625, "logps/rejected": -226.2512969970703, "loss": 0.3728, "rewards/accuracies": 0.75, "rewards/chosen": 0.43890780210494995, "rewards/margins": 1.76227605342865, "rewards/rejected": -1.3233681917190552, "step": 2873 }, { "epoch": 0.33, "learning_rate": 2.0367552382067187e-07, "logits/chosen": -2.4810843467712402, "logits/rejected": -2.4452452659606934, "logps/chosen": -305.5585632324219, "logps/rejected": -262.0847473144531, "loss": 0.3351, "rewards/accuracies": 0.75, "rewards/chosen": -0.4443497657775879, "rewards/margins": 2.7303013801574707, "rewards/rejected": -3.1746511459350586, "step": 2874 }, { "epoch": 0.33, "learning_rate": 2.0364040735104763e-07, "logits/chosen": -2.2279763221740723, "logits/rejected": -2.673219919204712, "logps/chosen": -185.39920043945312, "logps/rejected": -227.1730499267578, "loss": 0.4857, "rewards/accuracies": 0.625, "rewards/chosen": 0.03498871624469757, "rewards/margins": 1.4246978759765625, "rewards/rejected": -1.3897091150283813, "step": 2875 }, { "epoch": 0.33, "learning_rate": 2.0360529088142335e-07, "logits/chosen": -3.4194071292877197, "logits/rejected": -3.7679789066314697, "logps/chosen": -136.21278381347656, "logps/rejected": -273.4063720703125, "loss": 0.6594, "rewards/accuracies": 0.5, "rewards/chosen": -0.5958983302116394, "rewards/margins": 1.835218071937561, "rewards/rejected": -2.4311163425445557, "step": 2876 }, { "epoch": 0.33, "learning_rate": 2.0357017441179913e-07, "logits/chosen": -3.377349376678467, "logits/rejected": -3.5270557403564453, "logps/chosen": -147.3437957763672, "logps/rejected": -179.2998504638672, "loss": 0.5146, "rewards/accuracies": 0.875, "rewards/chosen": -0.2750656008720398, "rewards/margins": 0.9859857559204102, "rewards/rejected": -1.2610514163970947, "step": 2877 }, { "epoch": 0.33, "learning_rate": 2.035350579421749e-07, "logits/chosen": -3.3747410774230957, "logits/rejected": -3.6004037857055664, "logps/chosen": -132.59349060058594, "logps/rejected": -145.92938232421875, "loss": 0.4905, "rewards/accuracies": 0.75, "rewards/chosen": -0.16789588332176208, "rewards/margins": 0.9863411784172058, "rewards/rejected": -1.1542370319366455, "step": 2878 }, { "epoch": 0.33, "learning_rate": 2.0349994147255062e-07, "logits/chosen": -3.310739040374756, "logits/rejected": -3.25069260597229, "logps/chosen": -309.7239990234375, "logps/rejected": -194.3499298095703, "loss": 0.3466, "rewards/accuracies": 0.875, "rewards/chosen": 0.23342959582805634, "rewards/margins": 1.051843285560608, "rewards/rejected": -0.8184137344360352, "step": 2879 }, { "epoch": 0.33, "learning_rate": 2.0346482500292637e-07, "logits/chosen": -2.6922240257263184, "logits/rejected": -2.826249361038208, "logps/chosen": -399.671142578125, "logps/rejected": -294.33953857421875, "loss": 0.3997, "rewards/accuracies": 0.75, "rewards/chosen": 0.1272048056125641, "rewards/margins": 1.224534273147583, "rewards/rejected": -1.0973294973373413, "step": 2880 }, { "epoch": 0.33, "learning_rate": 2.0342970853330212e-07, "logits/chosen": -2.624650478363037, "logits/rejected": -2.749953031539917, "logps/chosen": -311.1007995605469, "logps/rejected": -484.07110595703125, "loss": 0.6448, "rewards/accuracies": 0.625, "rewards/chosen": -0.6814218163490295, "rewards/margins": 0.4598720669746399, "rewards/rejected": -1.1412938833236694, "step": 2881 }, { "epoch": 0.33, "learning_rate": 2.0339459206367785e-07, "logits/chosen": -2.7654786109924316, "logits/rejected": -2.634474992752075, "logps/chosen": -358.39947509765625, "logps/rejected": -334.8489685058594, "loss": 0.6411, "rewards/accuracies": 0.625, "rewards/chosen": 0.005508989095687866, "rewards/margins": 0.6813869476318359, "rewards/rejected": -0.6758779883384705, "step": 2882 }, { "epoch": 0.33, "learning_rate": 2.033594755940536e-07, "logits/chosen": -3.3589162826538086, "logits/rejected": -3.4439425468444824, "logps/chosen": -259.4148254394531, "logps/rejected": -226.098388671875, "loss": 0.4831, "rewards/accuracies": 0.875, "rewards/chosen": -0.10776549577713013, "rewards/margins": 1.1535016298294067, "rewards/rejected": -1.2612671852111816, "step": 2883 }, { "epoch": 0.33, "learning_rate": 2.0332435912442933e-07, "logits/chosen": -3.2560248374938965, "logits/rejected": -3.540004253387451, "logps/chosen": -106.66690063476562, "logps/rejected": -151.1693878173828, "loss": 0.3813, "rewards/accuracies": 1.0, "rewards/chosen": -0.25389766693115234, "rewards/margins": 1.2234773635864258, "rewards/rejected": -1.4773750305175781, "step": 2884 }, { "epoch": 0.33, "learning_rate": 2.0328924265480509e-07, "logits/chosen": -2.682955265045166, "logits/rejected": -2.7434847354888916, "logps/chosen": -237.13888549804688, "logps/rejected": -188.9593963623047, "loss": 0.3485, "rewards/accuracies": 0.875, "rewards/chosen": -0.18113292753696442, "rewards/margins": 1.7273635864257812, "rewards/rejected": -1.9084967374801636, "step": 2885 }, { "epoch": 0.33, "learning_rate": 2.0325412618518084e-07, "logits/chosen": -3.130871057510376, "logits/rejected": -3.096796751022339, "logps/chosen": -448.5235900878906, "logps/rejected": -366.7471923828125, "loss": 0.5405, "rewards/accuracies": 0.875, "rewards/chosen": -0.2540725767612457, "rewards/margins": 1.0548664331436157, "rewards/rejected": -1.308938980102539, "step": 2886 }, { "epoch": 0.33, "learning_rate": 2.0321900971555657e-07, "logits/chosen": -2.966341972351074, "logits/rejected": -2.6792943477630615, "logps/chosen": -404.1424560546875, "logps/rejected": -265.12969970703125, "loss": 0.628, "rewards/accuracies": 0.625, "rewards/chosen": -0.33887067437171936, "rewards/margins": 0.7187198400497437, "rewards/rejected": -1.0575904846191406, "step": 2887 }, { "epoch": 0.33, "learning_rate": 2.0318389324593235e-07, "logits/chosen": -3.5187668800354004, "logits/rejected": -3.288881301879883, "logps/chosen": -180.57705688476562, "logps/rejected": -159.48138427734375, "loss": 0.5362, "rewards/accuracies": 0.75, "rewards/chosen": -0.4802477955818176, "rewards/margins": 0.7036935091018677, "rewards/rejected": -1.1839412450790405, "step": 2888 }, { "epoch": 0.33, "learning_rate": 2.031487767763081e-07, "logits/chosen": -2.6170167922973633, "logits/rejected": -3.084103584289551, "logps/chosen": -216.09457397460938, "logps/rejected": -291.9983825683594, "loss": 0.2523, "rewards/accuracies": 0.875, "rewards/chosen": 0.1299726963043213, "rewards/margins": 2.5426528453826904, "rewards/rejected": -2.41267991065979, "step": 2889 }, { "epoch": 0.33, "learning_rate": 2.0311366030668383e-07, "logits/chosen": -3.602548599243164, "logits/rejected": -3.6066975593566895, "logps/chosen": -285.4060363769531, "logps/rejected": -203.991455078125, "loss": 0.6956, "rewards/accuracies": 0.875, "rewards/chosen": -0.5418540239334106, "rewards/margins": 1.4668782949447632, "rewards/rejected": -2.0087320804595947, "step": 2890 }, { "epoch": 0.33, "learning_rate": 2.0307854383705958e-07, "logits/chosen": -3.328801393508911, "logits/rejected": -3.5992088317871094, "logps/chosen": -222.65676879882812, "logps/rejected": -256.3575439453125, "loss": 0.2553, "rewards/accuracies": 0.875, "rewards/chosen": 0.2712659239768982, "rewards/margins": 2.6190712451934814, "rewards/rejected": -2.3478055000305176, "step": 2891 }, { "epoch": 0.33, "learning_rate": 2.030434273674353e-07, "logits/chosen": -3.5535213947296143, "logits/rejected": -3.6672935485839844, "logps/chosen": -263.6219177246094, "logps/rejected": -281.49920654296875, "loss": 0.2473, "rewards/accuracies": 1.0, "rewards/chosen": 0.16253812611103058, "rewards/margins": 1.9850643873214722, "rewards/rejected": -1.822526216506958, "step": 2892 }, { "epoch": 0.33, "learning_rate": 2.0300831089781106e-07, "logits/chosen": -2.7230935096740723, "logits/rejected": -2.833466053009033, "logps/chosen": -237.63197326660156, "logps/rejected": -339.1151123046875, "loss": 0.226, "rewards/accuracies": 0.875, "rewards/chosen": -0.0367647185921669, "rewards/margins": 2.592855930328369, "rewards/rejected": -2.6296205520629883, "step": 2893 }, { "epoch": 0.33, "learning_rate": 2.0297319442818682e-07, "logits/chosen": -3.154160976409912, "logits/rejected": -3.2895665168762207, "logps/chosen": -169.5806884765625, "logps/rejected": -214.6156463623047, "loss": 0.5531, "rewards/accuracies": 0.625, "rewards/chosen": -0.6379812359809875, "rewards/margins": 1.3894729614257812, "rewards/rejected": -2.027454137802124, "step": 2894 }, { "epoch": 0.33, "learning_rate": 2.0293807795856254e-07, "logits/chosen": -2.7812728881835938, "logits/rejected": -2.794137477874756, "logps/chosen": -172.37005615234375, "logps/rejected": -240.3179168701172, "loss": 0.3697, "rewards/accuracies": 0.75, "rewards/chosen": -0.11813174188137054, "rewards/margins": 1.116188883781433, "rewards/rejected": -1.2343206405639648, "step": 2895 }, { "epoch": 0.33, "learning_rate": 2.029029614889383e-07, "logits/chosen": -2.7991766929626465, "logits/rejected": -2.82218599319458, "logps/chosen": -268.7731628417969, "logps/rejected": -259.3908996582031, "loss": 0.1853, "rewards/accuracies": 1.0, "rewards/chosen": 0.027704190462827682, "rewards/margins": 1.890121340751648, "rewards/rejected": -1.8624169826507568, "step": 2896 }, { "epoch": 0.33, "learning_rate": 2.0286784501931405e-07, "logits/chosen": -3.3356785774230957, "logits/rejected": -3.206099033355713, "logps/chosen": -182.0909881591797, "logps/rejected": -149.77499389648438, "loss": 0.2662, "rewards/accuracies": 0.875, "rewards/chosen": 0.7972962856292725, "rewards/margins": 1.7562203407287598, "rewards/rejected": -0.9589241147041321, "step": 2897 }, { "epoch": 0.33, "learning_rate": 2.0283272854968978e-07, "logits/chosen": -3.5309667587280273, "logits/rejected": -3.4538121223449707, "logps/chosen": -189.53086853027344, "logps/rejected": -159.86663818359375, "loss": 0.1641, "rewards/accuracies": 1.0, "rewards/chosen": 0.5535858869552612, "rewards/margins": 2.20595121383667, "rewards/rejected": -1.6523652076721191, "step": 2898 }, { "epoch": 0.33, "learning_rate": 2.0279761208006556e-07, "logits/chosen": -3.309152841567993, "logits/rejected": -3.5300586223602295, "logps/chosen": -120.97969055175781, "logps/rejected": -274.0085754394531, "loss": 0.1509, "rewards/accuracies": 1.0, "rewards/chosen": -0.07035361975431442, "rewards/margins": 2.709697961807251, "rewards/rejected": -2.7800517082214355, "step": 2899 }, { "epoch": 0.33, "learning_rate": 2.027624956104413e-07, "logits/chosen": -3.1258974075317383, "logits/rejected": -2.951890230178833, "logps/chosen": -290.8453674316406, "logps/rejected": -259.3034362792969, "loss": 0.5096, "rewards/accuracies": 0.625, "rewards/chosen": -0.16470569372177124, "rewards/margins": 0.9113531112670898, "rewards/rejected": -1.0760588645935059, "step": 2900 }, { "epoch": 0.33, "learning_rate": 2.0272737914081704e-07, "logits/chosen": -3.3193087577819824, "logits/rejected": -3.1440491676330566, "logps/chosen": -223.26614379882812, "logps/rejected": -278.0645751953125, "loss": 0.2752, "rewards/accuracies": 1.0, "rewards/chosen": 0.00713968463242054, "rewards/margins": 1.2909389734268188, "rewards/rejected": -1.2837992906570435, "step": 2901 }, { "epoch": 0.33, "learning_rate": 2.026922626711928e-07, "logits/chosen": -3.4273622035980225, "logits/rejected": -3.6122689247131348, "logps/chosen": -272.2105407714844, "logps/rejected": -250.69073486328125, "loss": 0.5938, "rewards/accuracies": 0.625, "rewards/chosen": -0.3840213418006897, "rewards/margins": 1.9926505088806152, "rewards/rejected": -2.3766720294952393, "step": 2902 }, { "epoch": 0.33, "learning_rate": 2.0265714620156852e-07, "logits/chosen": -3.8571419715881348, "logits/rejected": -3.744837760925293, "logps/chosen": -90.60382080078125, "logps/rejected": -115.12057495117188, "loss": 0.4365, "rewards/accuracies": 0.75, "rewards/chosen": 0.07765614986419678, "rewards/margins": 0.9016141295433044, "rewards/rejected": -0.8239579200744629, "step": 2903 }, { "epoch": 0.33, "learning_rate": 2.0262202973194428e-07, "logits/chosen": -3.0325510501861572, "logits/rejected": -3.0662951469421387, "logps/chosen": -174.23849487304688, "logps/rejected": -239.7551727294922, "loss": 0.5201, "rewards/accuracies": 0.625, "rewards/chosen": -0.08118686825037003, "rewards/margins": 1.1486828327178955, "rewards/rejected": -1.2298697233200073, "step": 2904 }, { "epoch": 0.33, "learning_rate": 2.0258691326232e-07, "logits/chosen": -3.1456851959228516, "logits/rejected": -2.981926918029785, "logps/chosen": -417.8323974609375, "logps/rejected": -232.88133239746094, "loss": 0.2142, "rewards/accuracies": 1.0, "rewards/chosen": -0.12147698551416397, "rewards/margins": 2.208392858505249, "rewards/rejected": -2.3298697471618652, "step": 2905 }, { "epoch": 0.34, "learning_rate": 2.0255179679269576e-07, "logits/chosen": -3.0123066902160645, "logits/rejected": -3.046659469604492, "logps/chosen": -111.80816650390625, "logps/rejected": -237.41744995117188, "loss": 0.4294, "rewards/accuracies": 0.875, "rewards/chosen": 0.2278384268283844, "rewards/margins": 1.8048381805419922, "rewards/rejected": -1.5769997835159302, "step": 2906 }, { "epoch": 0.34, "learning_rate": 2.025166803230715e-07, "logits/chosen": -2.841749429702759, "logits/rejected": -2.9171857833862305, "logps/chosen": -163.79049682617188, "logps/rejected": -271.8253173828125, "loss": 0.4686, "rewards/accuracies": 0.875, "rewards/chosen": -0.5389413237571716, "rewards/margins": 1.8660411834716797, "rewards/rejected": -2.404982566833496, "step": 2907 }, { "epoch": 0.34, "learning_rate": 2.0248156385344724e-07, "logits/chosen": -3.1888015270233154, "logits/rejected": -3.3762166500091553, "logps/chosen": -308.487548828125, "logps/rejected": -282.2408142089844, "loss": 0.2995, "rewards/accuracies": 0.875, "rewards/chosen": -0.2578979730606079, "rewards/margins": 1.6379125118255615, "rewards/rejected": -1.8958104848861694, "step": 2908 }, { "epoch": 0.34, "learning_rate": 2.02446447383823e-07, "logits/chosen": -3.409684181213379, "logits/rejected": -3.088074207305908, "logps/chosen": -231.34664916992188, "logps/rejected": -263.800048828125, "loss": 0.2925, "rewards/accuracies": 1.0, "rewards/chosen": 0.09666433930397034, "rewards/margins": 1.5264886617660522, "rewards/rejected": -1.4298243522644043, "step": 2909 }, { "epoch": 0.34, "learning_rate": 2.0241133091419877e-07, "logits/chosen": -3.5555026531219482, "logits/rejected": -3.3858587741851807, "logps/chosen": -220.3841552734375, "logps/rejected": -182.65028381347656, "loss": 0.2386, "rewards/accuracies": 1.0, "rewards/chosen": -0.10781025141477585, "rewards/margins": 1.9294021129608154, "rewards/rejected": -2.037212371826172, "step": 2910 }, { "epoch": 0.34, "learning_rate": 2.023762144445745e-07, "logits/chosen": -3.437788248062134, "logits/rejected": -3.441528558731079, "logps/chosen": -246.73094177246094, "logps/rejected": -155.72535705566406, "loss": 0.7647, "rewards/accuracies": 0.75, "rewards/chosen": -0.05477520823478699, "rewards/margins": 0.8151040077209473, "rewards/rejected": -0.8698792457580566, "step": 2911 }, { "epoch": 0.34, "learning_rate": 2.0234109797495025e-07, "logits/chosen": -2.5301971435546875, "logits/rejected": -2.4919474124908447, "logps/chosen": -139.75106811523438, "logps/rejected": -274.443115234375, "loss": 0.5712, "rewards/accuracies": 0.875, "rewards/chosen": 0.05151505768299103, "rewards/margins": 0.9389746189117432, "rewards/rejected": -0.8874596357345581, "step": 2912 }, { "epoch": 0.34, "learning_rate": 2.0230598150532598e-07, "logits/chosen": -2.386244297027588, "logits/rejected": -2.549412727355957, "logps/chosen": -285.9289245605469, "logps/rejected": -249.79061889648438, "loss": 0.6758, "rewards/accuracies": 0.5, "rewards/chosen": -0.001665368676185608, "rewards/margins": 0.6432605385780334, "rewards/rejected": -0.6449258923530579, "step": 2913 }, { "epoch": 0.34, "learning_rate": 2.0227086503570174e-07, "logits/chosen": -3.2651190757751465, "logits/rejected": -3.1326780319213867, "logps/chosen": -137.72396850585938, "logps/rejected": -200.672119140625, "loss": 0.3184, "rewards/accuracies": 1.0, "rewards/chosen": -0.36109378933906555, "rewards/margins": 1.7345033884048462, "rewards/rejected": -2.0955970287323, "step": 2914 }, { "epoch": 0.34, "learning_rate": 2.022357485660775e-07, "logits/chosen": -3.6490721702575684, "logits/rejected": -3.4258580207824707, "logps/chosen": -328.4866943359375, "logps/rejected": -218.25198364257812, "loss": 0.3498, "rewards/accuracies": 0.75, "rewards/chosen": 0.14412955939769745, "rewards/margins": 2.362576723098755, "rewards/rejected": -2.218447208404541, "step": 2915 }, { "epoch": 0.34, "learning_rate": 2.0220063209645322e-07, "logits/chosen": -3.072421073913574, "logits/rejected": -3.2418439388275146, "logps/chosen": -306.8154602050781, "logps/rejected": -353.8634948730469, "loss": 0.1439, "rewards/accuracies": 1.0, "rewards/chosen": 0.8371804356575012, "rewards/margins": 3.1967976093292236, "rewards/rejected": -2.359617233276367, "step": 2916 }, { "epoch": 0.34, "learning_rate": 2.0216551562682897e-07, "logits/chosen": -3.10593318939209, "logits/rejected": -2.9724957942962646, "logps/chosen": -271.1985778808594, "logps/rejected": -103.64981079101562, "loss": 0.5302, "rewards/accuracies": 0.625, "rewards/chosen": 0.18109387159347534, "rewards/margins": 0.8321651816368103, "rewards/rejected": -0.651071310043335, "step": 2917 }, { "epoch": 0.34, "learning_rate": 2.0213039915720472e-07, "logits/chosen": -3.32285737991333, "logits/rejected": -3.1761317253112793, "logps/chosen": -497.54815673828125, "logps/rejected": -296.56829833984375, "loss": 0.6467, "rewards/accuracies": 0.5, "rewards/chosen": -0.563912570476532, "rewards/margins": 0.5004069805145264, "rewards/rejected": -1.0643194913864136, "step": 2918 }, { "epoch": 0.34, "learning_rate": 2.0209528268758045e-07, "logits/chosen": -3.2541615962982178, "logits/rejected": -3.3115899562835693, "logps/chosen": -221.63436889648438, "logps/rejected": -352.35357666015625, "loss": 0.4507, "rewards/accuracies": 0.75, "rewards/chosen": -0.08433705568313599, "rewards/margins": 0.8270418047904968, "rewards/rejected": -0.9113788604736328, "step": 2919 }, { "epoch": 0.34, "learning_rate": 2.020601662179562e-07, "logits/chosen": -3.9774343967437744, "logits/rejected": -3.86190128326416, "logps/chosen": -194.93002319335938, "logps/rejected": -212.81338500976562, "loss": 0.6828, "rewards/accuracies": 0.625, "rewards/chosen": 0.14803776144981384, "rewards/margins": 1.3772728443145752, "rewards/rejected": -1.2292351722717285, "step": 2920 }, { "epoch": 0.34, "learning_rate": 2.0202504974833193e-07, "logits/chosen": -3.4205026626586914, "logits/rejected": -3.3348300457000732, "logps/chosen": -295.4635925292969, "logps/rejected": -240.77664184570312, "loss": 0.356, "rewards/accuracies": 0.875, "rewards/chosen": -0.27936410903930664, "rewards/margins": 1.2246732711791992, "rewards/rejected": -1.5040373802185059, "step": 2921 }, { "epoch": 0.34, "learning_rate": 2.0198993327870771e-07, "logits/chosen": -3.842554807662964, "logits/rejected": -3.4636380672454834, "logps/chosen": -298.9164123535156, "logps/rejected": -276.25439453125, "loss": 0.4523, "rewards/accuracies": 0.75, "rewards/chosen": -0.43982210755348206, "rewards/margins": 1.3990267515182495, "rewards/rejected": -1.8388487100601196, "step": 2922 }, { "epoch": 0.34, "learning_rate": 2.0195481680908347e-07, "logits/chosen": -3.4936327934265137, "logits/rejected": -3.4551618099212646, "logps/chosen": -144.1200714111328, "logps/rejected": -198.9249267578125, "loss": 0.5079, "rewards/accuracies": 0.625, "rewards/chosen": -0.4545038342475891, "rewards/margins": 1.5161478519439697, "rewards/rejected": -1.9706518650054932, "step": 2923 }, { "epoch": 0.34, "learning_rate": 2.019197003394592e-07, "logits/chosen": -3.250945568084717, "logits/rejected": -3.1881096363067627, "logps/chosen": -372.4384460449219, "logps/rejected": -301.46795654296875, "loss": 0.2915, "rewards/accuracies": 1.0, "rewards/chosen": -0.15509256720542908, "rewards/margins": 1.5211576223373413, "rewards/rejected": -1.6762502193450928, "step": 2924 }, { "epoch": 0.34, "learning_rate": 2.0188458386983495e-07, "logits/chosen": -2.9653918743133545, "logits/rejected": -3.219026565551758, "logps/chosen": -314.8522644042969, "logps/rejected": -172.25094604492188, "loss": 0.7247, "rewards/accuracies": 0.625, "rewards/chosen": -0.28413155674934387, "rewards/margins": 0.38722193241119385, "rewards/rejected": -0.6713534593582153, "step": 2925 }, { "epoch": 0.34, "learning_rate": 2.018494674002107e-07, "logits/chosen": -3.0218026638031006, "logits/rejected": -3.0762972831726074, "logps/chosen": -278.5806579589844, "logps/rejected": -171.37179565429688, "loss": 0.6272, "rewards/accuracies": 0.75, "rewards/chosen": -0.6304113864898682, "rewards/margins": 0.28319215774536133, "rewards/rejected": -0.9136036038398743, "step": 2926 }, { "epoch": 0.34, "learning_rate": 2.0181435093058643e-07, "logits/chosen": -3.6375489234924316, "logits/rejected": -3.9481396675109863, "logps/chosen": -114.65579223632812, "logps/rejected": -227.4328155517578, "loss": 0.3098, "rewards/accuracies": 0.875, "rewards/chosen": -0.36168745160102844, "rewards/margins": 2.165419340133667, "rewards/rejected": -2.527106761932373, "step": 2927 }, { "epoch": 0.34, "learning_rate": 2.0177923446096218e-07, "logits/chosen": -2.845045804977417, "logits/rejected": -3.15944504737854, "logps/chosen": -194.98345947265625, "logps/rejected": -196.55674743652344, "loss": 0.3986, "rewards/accuracies": 0.875, "rewards/chosen": 0.16046234965324402, "rewards/margins": 2.023402690887451, "rewards/rejected": -1.8629405498504639, "step": 2928 }, { "epoch": 0.34, "learning_rate": 2.017441179913379e-07, "logits/chosen": -2.2045390605926514, "logits/rejected": -2.417449951171875, "logps/chosen": -433.2235412597656, "logps/rejected": -398.8790283203125, "loss": 0.6651, "rewards/accuracies": 0.625, "rewards/chosen": 0.19060079753398895, "rewards/margins": 0.44331440329551697, "rewards/rejected": -0.2527135908603668, "step": 2929 }, { "epoch": 0.34, "learning_rate": 2.0170900152171367e-07, "logits/chosen": -2.8293135166168213, "logits/rejected": -2.6707863807678223, "logps/chosen": -434.1829833984375, "logps/rejected": -325.0392150878906, "loss": 0.2472, "rewards/accuracies": 0.875, "rewards/chosen": -0.11470673978328705, "rewards/margins": 2.8220791816711426, "rewards/rejected": -2.9367856979370117, "step": 2930 }, { "epoch": 0.34, "learning_rate": 2.0167388505208942e-07, "logits/chosen": -3.193936347961426, "logits/rejected": -3.1344683170318604, "logps/chosen": -361.7147521972656, "logps/rejected": -324.0749816894531, "loss": 0.2182, "rewards/accuracies": 1.0, "rewards/chosen": 0.020264655351638794, "rewards/margins": 1.947543740272522, "rewards/rejected": -1.927278995513916, "step": 2931 }, { "epoch": 0.34, "learning_rate": 2.0163876858246515e-07, "logits/chosen": -3.257889747619629, "logits/rejected": -3.6183388233184814, "logps/chosen": -107.74793243408203, "logps/rejected": -217.03079223632812, "loss": 0.455, "rewards/accuracies": 0.75, "rewards/chosen": -0.46542614698410034, "rewards/margins": 1.4531677961349487, "rewards/rejected": -1.9185938835144043, "step": 2932 }, { "epoch": 0.34, "learning_rate": 2.0160365211284093e-07, "logits/chosen": -3.3156652450561523, "logits/rejected": -3.4656808376312256, "logps/chosen": -233.94534301757812, "logps/rejected": -146.61886596679688, "loss": 0.5259, "rewards/accuracies": 0.75, "rewards/chosen": -0.38281339406967163, "rewards/margins": 0.6853949427604675, "rewards/rejected": -1.0682082176208496, "step": 2933 }, { "epoch": 0.34, "learning_rate": 2.0156853564321668e-07, "logits/chosen": -3.140315532684326, "logits/rejected": -3.1714439392089844, "logps/chosen": -209.2906036376953, "logps/rejected": -198.14663696289062, "loss": 0.2815, "rewards/accuracies": 1.0, "rewards/chosen": 0.6778032183647156, "rewards/margins": 1.5302822589874268, "rewards/rejected": -0.8524790406227112, "step": 2934 }, { "epoch": 0.34, "learning_rate": 2.015334191735924e-07, "logits/chosen": -2.9553558826446533, "logits/rejected": -2.9042861461639404, "logps/chosen": -132.9278564453125, "logps/rejected": -103.35874938964844, "loss": 0.7497, "rewards/accuracies": 0.625, "rewards/chosen": -0.03138389438390732, "rewards/margins": 0.488581120967865, "rewards/rejected": -0.5199650526046753, "step": 2935 }, { "epoch": 0.34, "learning_rate": 2.0149830270396816e-07, "logits/chosen": -3.115084648132324, "logits/rejected": -2.6558589935302734, "logps/chosen": -267.0437316894531, "logps/rejected": -221.674560546875, "loss": 0.4587, "rewards/accuracies": 0.875, "rewards/chosen": 0.1873985081911087, "rewards/margins": 0.8836804032325745, "rewards/rejected": -0.696281909942627, "step": 2936 }, { "epoch": 0.34, "learning_rate": 2.014631862343439e-07, "logits/chosen": -3.6395370960235596, "logits/rejected": -3.6731014251708984, "logps/chosen": -194.2936553955078, "logps/rejected": -232.82534790039062, "loss": 1.0017, "rewards/accuracies": 0.25, "rewards/chosen": -0.04591737687587738, "rewards/margins": 0.19130980968475342, "rewards/rejected": -0.237227201461792, "step": 2937 }, { "epoch": 0.34, "learning_rate": 2.0142806976471964e-07, "logits/chosen": -3.3354387283325195, "logits/rejected": -3.511521339416504, "logps/chosen": -222.4091339111328, "logps/rejected": -269.0169982910156, "loss": 0.3779, "rewards/accuracies": 0.75, "rewards/chosen": -0.044893428683280945, "rewards/margins": 1.6682689189910889, "rewards/rejected": -1.7131624221801758, "step": 2938 }, { "epoch": 0.34, "learning_rate": 2.013929532950954e-07, "logits/chosen": -2.6988158226013184, "logits/rejected": -2.481468677520752, "logps/chosen": -165.83949279785156, "logps/rejected": -208.2683868408203, "loss": 0.3038, "rewards/accuracies": 1.0, "rewards/chosen": -0.09846772253513336, "rewards/margins": 1.9758474826812744, "rewards/rejected": -2.074315071105957, "step": 2939 }, { "epoch": 0.34, "learning_rate": 2.0135783682547112e-07, "logits/chosen": -3.2513375282287598, "logits/rejected": -3.3446426391601562, "logps/chosen": -185.20703125, "logps/rejected": -226.14825439453125, "loss": 0.2151, "rewards/accuracies": 1.0, "rewards/chosen": 0.33747759461402893, "rewards/margins": 2.4196553230285645, "rewards/rejected": -2.0821776390075684, "step": 2940 }, { "epoch": 0.34, "learning_rate": 2.0132272035584688e-07, "logits/chosen": -3.3286094665527344, "logits/rejected": -3.4731173515319824, "logps/chosen": -111.169921875, "logps/rejected": -226.41802978515625, "loss": 0.3047, "rewards/accuracies": 0.875, "rewards/chosen": -0.3943968415260315, "rewards/margins": 3.1738436222076416, "rewards/rejected": -3.5682406425476074, "step": 2941 }, { "epoch": 0.34, "learning_rate": 2.0128760388622266e-07, "logits/chosen": -3.021522283554077, "logits/rejected": -3.2482199668884277, "logps/chosen": -90.61788177490234, "logps/rejected": -223.24044799804688, "loss": 0.1685, "rewards/accuracies": 0.875, "rewards/chosen": 0.2776203751564026, "rewards/margins": 3.1941661834716797, "rewards/rejected": -2.916545867919922, "step": 2942 }, { "epoch": 0.34, "learning_rate": 2.0125248741659836e-07, "logits/chosen": -3.879267930984497, "logits/rejected": -3.4690473079681396, "logps/chosen": -345.91107177734375, "logps/rejected": -193.40048217773438, "loss": 0.3728, "rewards/accuracies": 1.0, "rewards/chosen": 0.0711236298084259, "rewards/margins": 1.0579986572265625, "rewards/rejected": -0.986875057220459, "step": 2943 }, { "epoch": 0.34, "learning_rate": 2.0121737094697414e-07, "logits/chosen": -3.5829286575317383, "logits/rejected": -3.644287586212158, "logps/chosen": -79.34429931640625, "logps/rejected": -163.7261199951172, "loss": 0.2724, "rewards/accuracies": 1.0, "rewards/chosen": 0.3883410096168518, "rewards/margins": 2.1744675636291504, "rewards/rejected": -1.7861266136169434, "step": 2944 }, { "epoch": 0.34, "learning_rate": 2.0118225447734987e-07, "logits/chosen": -3.2257795333862305, "logits/rejected": -3.5706822872161865, "logps/chosen": -86.93380737304688, "logps/rejected": -153.52951049804688, "loss": 0.6394, "rewards/accuracies": 0.5, "rewards/chosen": 0.020923465490341187, "rewards/margins": 0.8440307378768921, "rewards/rejected": -0.8231073021888733, "step": 2945 }, { "epoch": 0.34, "learning_rate": 2.0114713800772562e-07, "logits/chosen": -3.0329060554504395, "logits/rejected": -2.8970446586608887, "logps/chosen": -260.33770751953125, "logps/rejected": -193.12661743164062, "loss": 0.4413, "rewards/accuracies": 1.0, "rewards/chosen": 0.0754384770989418, "rewards/margins": 0.7200766801834106, "rewards/rejected": -0.644638180732727, "step": 2946 }, { "epoch": 0.34, "learning_rate": 2.0111202153810137e-07, "logits/chosen": -2.883592367172241, "logits/rejected": -2.868070602416992, "logps/chosen": -205.02223205566406, "logps/rejected": -204.54791259765625, "loss": 0.5112, "rewards/accuracies": 0.875, "rewards/chosen": -0.2792278826236725, "rewards/margins": 0.8101165294647217, "rewards/rejected": -1.0893443822860718, "step": 2947 }, { "epoch": 0.34, "learning_rate": 2.010769050684771e-07, "logits/chosen": -2.8097195625305176, "logits/rejected": -3.152843952178955, "logps/chosen": -139.98876953125, "logps/rejected": -241.2833251953125, "loss": 0.3774, "rewards/accuracies": 0.875, "rewards/chosen": -0.027085930109024048, "rewards/margins": 2.2503609657287598, "rewards/rejected": -2.277446746826172, "step": 2948 }, { "epoch": 0.34, "learning_rate": 2.0104178859885286e-07, "logits/chosen": -3.2890512943267822, "logits/rejected": -3.800539970397949, "logps/chosen": -122.72566986083984, "logps/rejected": -220.52108764648438, "loss": 0.3549, "rewards/accuracies": 0.875, "rewards/chosen": 0.293498158454895, "rewards/margins": 2.4187824726104736, "rewards/rejected": -2.125284194946289, "step": 2949 }, { "epoch": 0.34, "learning_rate": 2.0100667212922858e-07, "logits/chosen": -2.557565689086914, "logits/rejected": -2.4764654636383057, "logps/chosen": -384.3209228515625, "logps/rejected": -367.75927734375, "loss": 0.1625, "rewards/accuracies": 1.0, "rewards/chosen": -0.3155340850353241, "rewards/margins": 2.6300878524780273, "rewards/rejected": -2.945621967315674, "step": 2950 }, { "epoch": 0.34, "learning_rate": 2.0097155565960434e-07, "logits/chosen": -2.464866876602173, "logits/rejected": -2.6863346099853516, "logps/chosen": -482.4538879394531, "logps/rejected": -279.73681640625, "loss": 0.174, "rewards/accuracies": 1.0, "rewards/chosen": 0.1314268261194229, "rewards/margins": 2.312380313873291, "rewards/rejected": -2.1809535026550293, "step": 2951 }, { "epoch": 0.34, "learning_rate": 2.009364391899801e-07, "logits/chosen": -2.910322666168213, "logits/rejected": -3.2480764389038086, "logps/chosen": -247.26763916015625, "logps/rejected": -376.4541015625, "loss": 0.2103, "rewards/accuracies": 0.875, "rewards/chosen": 0.5165860652923584, "rewards/margins": 3.5483169555664062, "rewards/rejected": -3.031731128692627, "step": 2952 }, { "epoch": 0.34, "learning_rate": 2.0090132272035582e-07, "logits/chosen": -3.5618886947631836, "logits/rejected": -3.307736396789551, "logps/chosen": -283.1102600097656, "logps/rejected": -222.1205596923828, "loss": 0.6479, "rewards/accuracies": 0.625, "rewards/chosen": -0.3213953971862793, "rewards/margins": 1.54739511013031, "rewards/rejected": -1.868790626525879, "step": 2953 }, { "epoch": 0.34, "learning_rate": 2.0086620625073157e-07, "logits/chosen": -3.276106834411621, "logits/rejected": -3.2900655269622803, "logps/chosen": -274.07421875, "logps/rejected": -205.03900146484375, "loss": 0.3969, "rewards/accuracies": 0.875, "rewards/chosen": -0.10438403487205505, "rewards/margins": 1.4987530708312988, "rewards/rejected": -1.6031370162963867, "step": 2954 }, { "epoch": 0.34, "learning_rate": 2.0083108978110735e-07, "logits/chosen": -3.2050347328186035, "logits/rejected": -3.2259111404418945, "logps/chosen": -212.81309509277344, "logps/rejected": -217.99159240722656, "loss": 0.4604, "rewards/accuracies": 0.75, "rewards/chosen": -0.21151524782180786, "rewards/margins": 0.7038230895996094, "rewards/rejected": -0.915338397026062, "step": 2955 }, { "epoch": 0.34, "learning_rate": 2.0079597331148308e-07, "logits/chosen": -3.0979113578796387, "logits/rejected": -3.2714247703552246, "logps/chosen": -166.32839965820312, "logps/rejected": -222.6524200439453, "loss": 0.2415, "rewards/accuracies": 1.0, "rewards/chosen": -0.1351582109928131, "rewards/margins": 2.383890151977539, "rewards/rejected": -2.5190484523773193, "step": 2956 }, { "epoch": 0.34, "learning_rate": 2.0076085684185883e-07, "logits/chosen": -3.4065027236938477, "logits/rejected": -3.238586187362671, "logps/chosen": -320.4075012207031, "logps/rejected": -179.87060546875, "loss": 0.8869, "rewards/accuracies": 0.5, "rewards/chosen": -0.9620587825775146, "rewards/margins": 0.7493278384208679, "rewards/rejected": -1.711386799812317, "step": 2957 }, { "epoch": 0.34, "learning_rate": 2.0072574037223456e-07, "logits/chosen": -3.345984935760498, "logits/rejected": -3.1638994216918945, "logps/chosen": -254.9220733642578, "logps/rejected": -217.00027465820312, "loss": 0.2314, "rewards/accuracies": 1.0, "rewards/chosen": -0.01831601932644844, "rewards/margins": 1.9321069717407227, "rewards/rejected": -1.9504231214523315, "step": 2958 }, { "epoch": 0.34, "learning_rate": 2.0069062390261032e-07, "logits/chosen": -3.2762582302093506, "logits/rejected": -3.6065890789031982, "logps/chosen": -201.4008026123047, "logps/rejected": -246.94248962402344, "loss": 0.3326, "rewards/accuracies": 0.75, "rewards/chosen": 0.13006329536437988, "rewards/margins": 2.4494426250457764, "rewards/rejected": -2.3193793296813965, "step": 2959 }, { "epoch": 0.34, "learning_rate": 2.0065550743298607e-07, "logits/chosen": -3.7167484760284424, "logits/rejected": -3.4912686347961426, "logps/chosen": -225.8983917236328, "logps/rejected": -260.1999816894531, "loss": 0.5, "rewards/accuracies": 0.625, "rewards/chosen": -0.5673426985740662, "rewards/margins": 2.505617618560791, "rewards/rejected": -3.0729598999023438, "step": 2960 }, { "epoch": 0.34, "learning_rate": 2.006203909633618e-07, "logits/chosen": -2.3851637840270996, "logits/rejected": -2.2815890312194824, "logps/chosen": -324.37042236328125, "logps/rejected": -377.2200012207031, "loss": 0.4755, "rewards/accuracies": 0.75, "rewards/chosen": -0.5461332201957703, "rewards/margins": 0.6332523822784424, "rewards/rejected": -1.1793855428695679, "step": 2961 }, { "epoch": 0.34, "learning_rate": 2.0058527449373755e-07, "logits/chosen": -3.7728652954101562, "logits/rejected": -3.8157835006713867, "logps/chosen": -225.0650634765625, "logps/rejected": -191.16131591796875, "loss": 0.5564, "rewards/accuracies": 0.75, "rewards/chosen": -0.32488149404525757, "rewards/margins": 1.783473253250122, "rewards/rejected": -2.1083548069000244, "step": 2962 }, { "epoch": 0.34, "learning_rate": 2.005501580241133e-07, "logits/chosen": -3.6742606163024902, "logits/rejected": -3.884150266647339, "logps/chosen": -190.41400146484375, "logps/rejected": -343.18048095703125, "loss": 0.2513, "rewards/accuracies": 1.0, "rewards/chosen": 0.7148770093917847, "rewards/margins": 2.357071876525879, "rewards/rejected": -1.6421949863433838, "step": 2963 }, { "epoch": 0.34, "learning_rate": 2.0051504155448903e-07, "logits/chosen": -2.3819708824157715, "logits/rejected": -2.454144239425659, "logps/chosen": -463.3072204589844, "logps/rejected": -444.0306701660156, "loss": 0.414, "rewards/accuracies": 0.625, "rewards/chosen": 0.7822462320327759, "rewards/margins": 1.340709924697876, "rewards/rejected": -0.5584636926651001, "step": 2964 }, { "epoch": 0.34, "learning_rate": 2.0047992508486479e-07, "logits/chosen": -3.45698618888855, "logits/rejected": -3.4458649158477783, "logps/chosen": -177.67025756835938, "logps/rejected": -293.7791442871094, "loss": 0.31, "rewards/accuracies": 0.75, "rewards/chosen": -0.3144713342189789, "rewards/margins": 1.9426510334014893, "rewards/rejected": -2.25712251663208, "step": 2965 }, { "epoch": 0.34, "learning_rate": 2.004448086152405e-07, "logits/chosen": -3.2309508323669434, "logits/rejected": -3.1886708736419678, "logps/chosen": -285.8022155761719, "logps/rejected": -278.4993896484375, "loss": 0.4845, "rewards/accuracies": 0.75, "rewards/chosen": -0.021058499813079834, "rewards/margins": 0.8736082315444946, "rewards/rejected": -0.8946667909622192, "step": 2966 }, { "epoch": 0.34, "learning_rate": 2.004096921456163e-07, "logits/chosen": -2.5895845890045166, "logits/rejected": -2.450655937194824, "logps/chosen": -323.84771728515625, "logps/rejected": -358.38043212890625, "loss": 0.3924, "rewards/accuracies": 0.875, "rewards/chosen": 0.4293551445007324, "rewards/margins": 1.597538709640503, "rewards/rejected": -1.168183445930481, "step": 2967 }, { "epoch": 0.34, "learning_rate": 2.0037457567599205e-07, "logits/chosen": -3.168048858642578, "logits/rejected": -2.779696464538574, "logps/chosen": -429.7923278808594, "logps/rejected": -248.91915893554688, "loss": 0.6578, "rewards/accuracies": 0.5, "rewards/chosen": -0.03198336809873581, "rewards/margins": 0.9688706398010254, "rewards/rejected": -1.0008540153503418, "step": 2968 }, { "epoch": 0.34, "learning_rate": 2.0033945920636777e-07, "logits/chosen": -3.5022358894348145, "logits/rejected": -3.593702793121338, "logps/chosen": -232.06930541992188, "logps/rejected": -204.57720947265625, "loss": 0.5591, "rewards/accuracies": 0.75, "rewards/chosen": -0.03298419713973999, "rewards/margins": 1.623997449874878, "rewards/rejected": -1.6569815874099731, "step": 2969 }, { "epoch": 0.34, "learning_rate": 2.0030434273674353e-07, "logits/chosen": -2.515444755554199, "logits/rejected": -2.611987829208374, "logps/chosen": -437.18853759765625, "logps/rejected": -484.5002136230469, "loss": 0.6607, "rewards/accuracies": 0.625, "rewards/chosen": 0.20678915083408356, "rewards/margins": 0.6823543310165405, "rewards/rejected": -0.4755651652812958, "step": 2970 }, { "epoch": 0.34, "learning_rate": 2.0026922626711928e-07, "logits/chosen": -3.229118824005127, "logits/rejected": -2.9941413402557373, "logps/chosen": -257.7070007324219, "logps/rejected": -258.85931396484375, "loss": 0.4121, "rewards/accuracies": 0.875, "rewards/chosen": -0.009428739547729492, "rewards/margins": 1.835546612739563, "rewards/rejected": -1.8449753522872925, "step": 2971 }, { "epoch": 0.34, "learning_rate": 2.00234109797495e-07, "logits/chosen": -2.910989999771118, "logits/rejected": -2.967456340789795, "logps/chosen": -362.3610534667969, "logps/rejected": -354.69659423828125, "loss": 0.2608, "rewards/accuracies": 0.875, "rewards/chosen": 0.19265328347682953, "rewards/margins": 2.3764946460723877, "rewards/rejected": -2.1838412284851074, "step": 2972 }, { "epoch": 0.34, "learning_rate": 2.0019899332787076e-07, "logits/chosen": -3.767831325531006, "logits/rejected": -3.752877712249756, "logps/chosen": -297.3468017578125, "logps/rejected": -234.800537109375, "loss": 0.3336, "rewards/accuracies": 0.75, "rewards/chosen": -0.04232408106327057, "rewards/margins": 2.679588794708252, "rewards/rejected": -2.7219130992889404, "step": 2973 }, { "epoch": 0.34, "learning_rate": 2.001638768582465e-07, "logits/chosen": -2.808736801147461, "logits/rejected": -2.622533082962036, "logps/chosen": -447.11077880859375, "logps/rejected": -370.8930358886719, "loss": 0.6322, "rewards/accuracies": 0.75, "rewards/chosen": -0.3884313404560089, "rewards/margins": 0.7366093397140503, "rewards/rejected": -1.1250405311584473, "step": 2974 }, { "epoch": 0.34, "learning_rate": 2.0012876038862224e-07, "logits/chosen": -2.6631946563720703, "logits/rejected": -2.606398344039917, "logps/chosen": -444.72528076171875, "logps/rejected": -342.0912170410156, "loss": 0.2375, "rewards/accuracies": 0.875, "rewards/chosen": 0.08098623156547546, "rewards/margins": 1.9411709308624268, "rewards/rejected": -1.860184669494629, "step": 2975 }, { "epoch": 0.34, "learning_rate": 2.0009364391899802e-07, "logits/chosen": -2.4911019802093506, "logits/rejected": -2.389383554458618, "logps/chosen": -409.98248291015625, "logps/rejected": -230.0225067138672, "loss": 0.3638, "rewards/accuracies": 0.875, "rewards/chosen": -0.04305548965930939, "rewards/margins": 1.7912445068359375, "rewards/rejected": -1.8343000411987305, "step": 2976 }, { "epoch": 0.34, "learning_rate": 2.0005852744937373e-07, "logits/chosen": -3.051130533218384, "logits/rejected": -3.1382083892822266, "logps/chosen": -285.3281555175781, "logps/rejected": -179.24974060058594, "loss": 0.3509, "rewards/accuracies": 0.875, "rewards/chosen": -0.7016482949256897, "rewards/margins": 1.540708303451538, "rewards/rejected": -2.242356538772583, "step": 2977 }, { "epoch": 0.34, "learning_rate": 2.000234109797495e-07, "logits/chosen": -3.412571668624878, "logits/rejected": -2.9769859313964844, "logps/chosen": -272.42523193359375, "logps/rejected": -270.3815612792969, "loss": 0.263, "rewards/accuracies": 0.875, "rewards/chosen": 0.07033814489841461, "rewards/margins": 2.4317941665649414, "rewards/rejected": -2.3614561557769775, "step": 2978 }, { "epoch": 0.34, "learning_rate": 1.9998829451012526e-07, "logits/chosen": -3.3354339599609375, "logits/rejected": -3.3684403896331787, "logps/chosen": -213.25857543945312, "logps/rejected": -192.07913208007812, "loss": 0.4038, "rewards/accuracies": 0.75, "rewards/chosen": 0.35450178384780884, "rewards/margins": 1.3645257949829102, "rewards/rejected": -1.010023832321167, "step": 2979 }, { "epoch": 0.34, "learning_rate": 1.99953178040501e-07, "logits/chosen": -2.2966232299804688, "logits/rejected": -2.5412094593048096, "logps/chosen": -148.21197509765625, "logps/rejected": -104.93280029296875, "loss": 0.4159, "rewards/accuracies": 0.875, "rewards/chosen": -0.03637028485536575, "rewards/margins": 0.8655422329902649, "rewards/rejected": -0.9019125699996948, "step": 2980 }, { "epoch": 0.34, "learning_rate": 1.9991806157087674e-07, "logits/chosen": -2.4457528591156006, "logits/rejected": -2.567152500152588, "logps/chosen": -334.0098571777344, "logps/rejected": -285.9653625488281, "loss": 0.4737, "rewards/accuracies": 0.875, "rewards/chosen": 0.04161853343248367, "rewards/margins": 1.2551064491271973, "rewards/rejected": -1.2134878635406494, "step": 2981 }, { "epoch": 0.34, "learning_rate": 1.9988294510125247e-07, "logits/chosen": -2.269549608230591, "logits/rejected": -2.194660186767578, "logps/chosen": -351.0200500488281, "logps/rejected": -302.31170654296875, "loss": 0.2038, "rewards/accuracies": 1.0, "rewards/chosen": 0.7515136003494263, "rewards/margins": 1.7577385902404785, "rewards/rejected": -1.0062249898910522, "step": 2982 }, { "epoch": 0.34, "learning_rate": 1.9984782863162822e-07, "logits/chosen": -2.849526882171631, "logits/rejected": -2.9044618606567383, "logps/chosen": -142.3116912841797, "logps/rejected": -178.07489013671875, "loss": 0.504, "rewards/accuracies": 0.875, "rewards/chosen": -0.3033895194530487, "rewards/margins": 1.4080030918121338, "rewards/rejected": -1.7113924026489258, "step": 2983 }, { "epoch": 0.34, "learning_rate": 1.9981271216200398e-07, "logits/chosen": -2.6064300537109375, "logits/rejected": -2.7185001373291016, "logps/chosen": -223.63357543945312, "logps/rejected": -320.416748046875, "loss": 0.4903, "rewards/accuracies": 0.75, "rewards/chosen": -0.35416316986083984, "rewards/margins": 1.364677906036377, "rewards/rejected": -1.7188410758972168, "step": 2984 }, { "epoch": 0.34, "learning_rate": 1.997775956923797e-07, "logits/chosen": -3.716830253601074, "logits/rejected": -3.910639524459839, "logps/chosen": -256.9998779296875, "logps/rejected": -315.8363952636719, "loss": 0.3436, "rewards/accuracies": 0.75, "rewards/chosen": 0.5190137028694153, "rewards/margins": 2.1939942836761475, "rewards/rejected": -1.6749805212020874, "step": 2985 }, { "epoch": 0.34, "learning_rate": 1.9974247922275546e-07, "logits/chosen": -2.975961685180664, "logits/rejected": -3.188007354736328, "logps/chosen": -289.7637023925781, "logps/rejected": -246.95179748535156, "loss": 0.2917, "rewards/accuracies": 0.875, "rewards/chosen": 0.29284995794296265, "rewards/margins": 1.8309082984924316, "rewards/rejected": -1.5380582809448242, "step": 2986 }, { "epoch": 0.34, "learning_rate": 1.9970736275313124e-07, "logits/chosen": -3.8261470794677734, "logits/rejected": -3.8902313709259033, "logps/chosen": -365.32080078125, "logps/rejected": -369.98876953125, "loss": 0.308, "rewards/accuracies": 0.875, "rewards/chosen": 0.1501438319683075, "rewards/margins": 2.6673123836517334, "rewards/rejected": -2.5171687602996826, "step": 2987 }, { "epoch": 0.34, "learning_rate": 1.9967224628350694e-07, "logits/chosen": -2.8611676692962646, "logits/rejected": -2.8606300354003906, "logps/chosen": -201.0721435546875, "logps/rejected": -329.27618408203125, "loss": 0.4768, "rewards/accuracies": 0.75, "rewards/chosen": -0.3275659680366516, "rewards/margins": 0.7490256428718567, "rewards/rejected": -1.0765914916992188, "step": 2988 }, { "epoch": 0.34, "learning_rate": 1.9963712981388272e-07, "logits/chosen": -3.416454792022705, "logits/rejected": -3.345167398452759, "logps/chosen": -69.67001342773438, "logps/rejected": -111.84070587158203, "loss": 0.4732, "rewards/accuracies": 0.625, "rewards/chosen": -0.07749253511428833, "rewards/margins": 1.2552928924560547, "rewards/rejected": -1.3327854871749878, "step": 2989 }, { "epoch": 0.34, "learning_rate": 1.9960201334425845e-07, "logits/chosen": -3.3993122577667236, "logits/rejected": -3.369974136352539, "logps/chosen": -197.49288940429688, "logps/rejected": -242.44815063476562, "loss": 0.1099, "rewards/accuracies": 1.0, "rewards/chosen": 0.40783071517944336, "rewards/margins": 3.1466867923736572, "rewards/rejected": -2.738856077194214, "step": 2990 }, { "epoch": 0.34, "learning_rate": 1.995668968746342e-07, "logits/chosen": -3.722980499267578, "logits/rejected": -3.53766131401062, "logps/chosen": -242.56549072265625, "logps/rejected": -155.88417053222656, "loss": 0.4759, "rewards/accuracies": 0.625, "rewards/chosen": -0.18551580607891083, "rewards/margins": 1.6566330194473267, "rewards/rejected": -1.842148780822754, "step": 2991 }, { "epoch": 0.34, "learning_rate": 1.9953178040500995e-07, "logits/chosen": -3.0331616401672363, "logits/rejected": -2.9508485794067383, "logps/chosen": -301.06365966796875, "logps/rejected": -183.7648468017578, "loss": 0.5323, "rewards/accuracies": 0.625, "rewards/chosen": -0.21685844659805298, "rewards/margins": 2.0872159004211426, "rewards/rejected": -2.304074287414551, "step": 2992 }, { "epoch": 0.35, "learning_rate": 1.9949666393538568e-07, "logits/chosen": -2.5392539501190186, "logits/rejected": -2.625774621963501, "logps/chosen": -325.6302185058594, "logps/rejected": -347.7114562988281, "loss": 0.2214, "rewards/accuracies": 1.0, "rewards/chosen": 0.05564439296722412, "rewards/margins": 1.881001353263855, "rewards/rejected": -1.8253567218780518, "step": 2993 }, { "epoch": 0.35, "learning_rate": 1.9946154746576144e-07, "logits/chosen": -3.414804458618164, "logits/rejected": -3.583747386932373, "logps/chosen": -298.76922607421875, "logps/rejected": -261.234619140625, "loss": 0.4232, "rewards/accuracies": 0.75, "rewards/chosen": -0.590377926826477, "rewards/margins": 1.2065370082855225, "rewards/rejected": -1.7969149351119995, "step": 2994 }, { "epoch": 0.35, "learning_rate": 1.9942643099613716e-07, "logits/chosen": -3.7196402549743652, "logits/rejected": -3.6113600730895996, "logps/chosen": -237.99790954589844, "logps/rejected": -267.6648254394531, "loss": 0.3164, "rewards/accuracies": 0.875, "rewards/chosen": -0.2398047149181366, "rewards/margins": 1.9074733257293701, "rewards/rejected": -2.14727783203125, "step": 2995 }, { "epoch": 0.35, "learning_rate": 1.9939131452651292e-07, "logits/chosen": -3.3375420570373535, "logits/rejected": -3.3888843059539795, "logps/chosen": -276.5927429199219, "logps/rejected": -267.129150390625, "loss": 0.1549, "rewards/accuracies": 1.0, "rewards/chosen": 0.2998148202896118, "rewards/margins": 2.171598434448242, "rewards/rejected": -1.8717834949493408, "step": 2996 }, { "epoch": 0.35, "learning_rate": 1.9935619805688867e-07, "logits/chosen": -2.497130870819092, "logits/rejected": -2.3048453330993652, "logps/chosen": -380.0819091796875, "logps/rejected": -299.1247863769531, "loss": 0.2903, "rewards/accuracies": 1.0, "rewards/chosen": -0.29808443784713745, "rewards/margins": 1.9018582105636597, "rewards/rejected": -2.1999425888061523, "step": 2997 }, { "epoch": 0.35, "learning_rate": 1.993210815872644e-07, "logits/chosen": -3.046635150909424, "logits/rejected": -2.855534076690674, "logps/chosen": -310.72760009765625, "logps/rejected": -285.21832275390625, "loss": 0.5567, "rewards/accuracies": 0.875, "rewards/chosen": 0.13025318086147308, "rewards/margins": 0.7707851529121399, "rewards/rejected": -0.6405320763587952, "step": 2998 }, { "epoch": 0.35, "learning_rate": 1.9928596511764015e-07, "logits/chosen": -3.0353543758392334, "logits/rejected": -3.0603725910186768, "logps/chosen": -209.21337890625, "logps/rejected": -329.19598388671875, "loss": 0.3971, "rewards/accuracies": 0.75, "rewards/chosen": -0.1573580801486969, "rewards/margins": 1.5026507377624512, "rewards/rejected": -1.6600087881088257, "step": 2999 }, { "epoch": 0.35, "learning_rate": 1.9925084864801593e-07, "logits/chosen": -3.063105583190918, "logits/rejected": -2.6062874794006348, "logps/chosen": -168.02154541015625, "logps/rejected": -242.42494201660156, "loss": 0.246, "rewards/accuracies": 0.875, "rewards/chosen": -0.255886048078537, "rewards/margins": 1.5238447189331055, "rewards/rejected": -1.7797307968139648, "step": 3000 }, { "epoch": 0.35, "eval_logits/chosen": -2.8445253372192383, "eval_logits/rejected": -2.806975841522217, "eval_logps/chosen": -293.6763610839844, "eval_logps/rejected": -235.7855682373047, "eval_loss": 0.44277361035346985, "eval_rewards/accuracies": 0.7714285850524902, "eval_rewards/chosen": 0.037860333919525146, "eval_rewards/margins": 1.1821558475494385, "eval_rewards/rejected": -1.144295334815979, "eval_runtime": 32.7716, "eval_samples_per_second": 2.136, "eval_steps_per_second": 1.068, "step": 3000 }, { "epoch": 0.35, "learning_rate": 1.9921573217839166e-07, "logits/chosen": -2.8027710914611816, "logits/rejected": -2.887425422668457, "logps/chosen": -513.3043823242188, "logps/rejected": -385.4537353515625, "loss": 0.4562, "rewards/accuracies": 0.5, "rewards/chosen": 0.41076868772506714, "rewards/margins": 1.4678168296813965, "rewards/rejected": -1.0570480823516846, "step": 3001 }, { "epoch": 0.35, "learning_rate": 1.9918061570876741e-07, "logits/chosen": -3.318390130996704, "logits/rejected": -3.3696188926696777, "logps/chosen": -111.37016296386719, "logps/rejected": -224.27706909179688, "loss": 0.3032, "rewards/accuracies": 0.875, "rewards/chosen": 0.2539695203304291, "rewards/margins": 1.6461830139160156, "rewards/rejected": -1.3922135829925537, "step": 3002 }, { "epoch": 0.35, "learning_rate": 1.9914549923914314e-07, "logits/chosen": -3.136902332305908, "logits/rejected": -3.3772804737091064, "logps/chosen": -337.8256530761719, "logps/rejected": -275.40423583984375, "loss": 0.6767, "rewards/accuracies": 0.5, "rewards/chosen": -0.6138310432434082, "rewards/margins": 1.276790976524353, "rewards/rejected": -1.8906221389770508, "step": 3003 }, { "epoch": 0.35, "learning_rate": 1.991103827695189e-07, "logits/chosen": -3.433337926864624, "logits/rejected": -3.3356785774230957, "logps/chosen": -181.3197021484375, "logps/rejected": -209.333740234375, "loss": 0.2586, "rewards/accuracies": 0.875, "rewards/chosen": -0.13835421204566956, "rewards/margins": 2.54742431640625, "rewards/rejected": -2.6857783794403076, "step": 3004 }, { "epoch": 0.35, "learning_rate": 1.9907526629989465e-07, "logits/chosen": -2.821763515472412, "logits/rejected": -2.961116313934326, "logps/chosen": -174.74969482421875, "logps/rejected": -251.42413330078125, "loss": 0.4287, "rewards/accuracies": 0.5, "rewards/chosen": -0.02719070017337799, "rewards/margins": 2.9332656860351562, "rewards/rejected": -2.960456371307373, "step": 3005 }, { "epoch": 0.35, "learning_rate": 1.9904014983027038e-07, "logits/chosen": -3.7379512786865234, "logits/rejected": -3.584634780883789, "logps/chosen": -405.835205078125, "logps/rejected": -380.30316162109375, "loss": 0.4206, "rewards/accuracies": 0.75, "rewards/chosen": -0.20924147963523865, "rewards/margins": 1.1872947216033936, "rewards/rejected": -1.3965362310409546, "step": 3006 }, { "epoch": 0.35, "learning_rate": 1.9900503336064613e-07, "logits/chosen": -3.562121868133545, "logits/rejected": -3.8115479946136475, "logps/chosen": -166.93214416503906, "logps/rejected": -213.4400634765625, "loss": 0.4442, "rewards/accuracies": 0.875, "rewards/chosen": 0.08170342445373535, "rewards/margins": 1.6551244258880615, "rewards/rejected": -1.5734210014343262, "step": 3007 }, { "epoch": 0.35, "learning_rate": 1.9896991689102188e-07, "logits/chosen": -2.8363289833068848, "logits/rejected": -2.9709248542785645, "logps/chosen": -372.1715087890625, "logps/rejected": -255.58438110351562, "loss": 0.6478, "rewards/accuracies": 0.75, "rewards/chosen": -0.6143717765808105, "rewards/margins": 1.224979043006897, "rewards/rejected": -1.839350938796997, "step": 3008 }, { "epoch": 0.35, "learning_rate": 1.989348004213976e-07, "logits/chosen": -3.510218858718872, "logits/rejected": -3.4929113388061523, "logps/chosen": -224.03868103027344, "logps/rejected": -275.6429748535156, "loss": 0.3077, "rewards/accuracies": 0.875, "rewards/chosen": -0.294406533241272, "rewards/margins": 1.5911123752593994, "rewards/rejected": -1.8855189085006714, "step": 3009 }, { "epoch": 0.35, "learning_rate": 1.988996839517734e-07, "logits/chosen": -2.471229076385498, "logits/rejected": -2.4347286224365234, "logps/chosen": -398.85101318359375, "logps/rejected": -286.23248291015625, "loss": 0.359, "rewards/accuracies": 0.75, "rewards/chosen": 0.36383056640625, "rewards/margins": 2.1161303520202637, "rewards/rejected": -1.7522996664047241, "step": 3010 }, { "epoch": 0.35, "learning_rate": 1.988645674821491e-07, "logits/chosen": -2.3877522945404053, "logits/rejected": -2.4750397205352783, "logps/chosen": -327.6005859375, "logps/rejected": -271.2843017578125, "loss": 0.7386, "rewards/accuracies": 0.5, "rewards/chosen": -0.6632050275802612, "rewards/margins": 0.38292232155799866, "rewards/rejected": -1.046127438545227, "step": 3011 }, { "epoch": 0.35, "learning_rate": 1.9882945101252487e-07, "logits/chosen": -2.903801918029785, "logits/rejected": -2.8651628494262695, "logps/chosen": -516.5899658203125, "logps/rejected": -389.511962890625, "loss": 0.274, "rewards/accuracies": 0.875, "rewards/chosen": -0.6114081144332886, "rewards/margins": 2.1922669410705566, "rewards/rejected": -2.8036751747131348, "step": 3012 }, { "epoch": 0.35, "learning_rate": 1.9879433454290063e-07, "logits/chosen": -2.432929515838623, "logits/rejected": -2.4705395698547363, "logps/chosen": -474.18035888671875, "logps/rejected": -308.600341796875, "loss": 0.3269, "rewards/accuracies": 0.875, "rewards/chosen": 0.21352744102478027, "rewards/margins": 1.6352531909942627, "rewards/rejected": -1.421725869178772, "step": 3013 }, { "epoch": 0.35, "learning_rate": 1.9875921807327635e-07, "logits/chosen": -3.472501754760742, "logits/rejected": -3.217576503753662, "logps/chosen": -340.12335205078125, "logps/rejected": -302.1429748535156, "loss": 0.3035, "rewards/accuracies": 0.875, "rewards/chosen": 0.1616649627685547, "rewards/margins": 2.697509765625, "rewards/rejected": -2.5358448028564453, "step": 3014 }, { "epoch": 0.35, "learning_rate": 1.987241016036521e-07, "logits/chosen": -2.5437166690826416, "logits/rejected": -2.7228076457977295, "logps/chosen": -285.1317443847656, "logps/rejected": -120.3369140625, "loss": 0.4389, "rewards/accuracies": 0.625, "rewards/chosen": 0.005285590887069702, "rewards/margins": 0.9945621490478516, "rewards/rejected": -0.9892765879631042, "step": 3015 }, { "epoch": 0.35, "learning_rate": 1.9868898513402786e-07, "logits/chosen": -3.5898420810699463, "logits/rejected": -3.2219769954681396, "logps/chosen": -334.88385009765625, "logps/rejected": -252.46917724609375, "loss": 0.2284, "rewards/accuracies": 0.875, "rewards/chosen": 0.484081506729126, "rewards/margins": 2.456275463104248, "rewards/rejected": -1.972193956375122, "step": 3016 }, { "epoch": 0.35, "learning_rate": 1.986538686644036e-07, "logits/chosen": -3.281723976135254, "logits/rejected": -2.991319417953491, "logps/chosen": -205.6256103515625, "logps/rejected": -149.90499877929688, "loss": 0.501, "rewards/accuracies": 0.875, "rewards/chosen": -0.18950071930885315, "rewards/margins": 1.5170499086380005, "rewards/rejected": -1.7065507173538208, "step": 3017 }, { "epoch": 0.35, "learning_rate": 1.9861875219477934e-07, "logits/chosen": -2.635406970977783, "logits/rejected": -3.1753180027008057, "logps/chosen": -284.2916564941406, "logps/rejected": -303.6649169921875, "loss": 0.2292, "rewards/accuracies": 1.0, "rewards/chosen": 0.13679316639900208, "rewards/margins": 2.396989107131958, "rewards/rejected": -2.260195732116699, "step": 3018 }, { "epoch": 0.35, "learning_rate": 1.9858363572515507e-07, "logits/chosen": -2.655086040496826, "logits/rejected": -2.872951030731201, "logps/chosen": -215.7069854736328, "logps/rejected": -294.8112487792969, "loss": 0.7569, "rewards/accuracies": 0.625, "rewards/chosen": -0.6652940511703491, "rewards/margins": 0.6008734703063965, "rewards/rejected": -1.2661676406860352, "step": 3019 }, { "epoch": 0.35, "learning_rate": 1.9854851925553082e-07, "logits/chosen": -3.1013875007629395, "logits/rejected": -3.3349311351776123, "logps/chosen": -117.22346496582031, "logps/rejected": -258.43963623046875, "loss": 0.3789, "rewards/accuracies": 0.625, "rewards/chosen": -0.4288647770881653, "rewards/margins": 2.8140647411346436, "rewards/rejected": -3.242929697036743, "step": 3020 }, { "epoch": 0.35, "learning_rate": 1.985134027859066e-07, "logits/chosen": -2.500993013381958, "logits/rejected": -3.0212295055389404, "logps/chosen": -202.703857421875, "logps/rejected": -208.21218872070312, "loss": 0.2429, "rewards/accuracies": 0.875, "rewards/chosen": 0.2056315839290619, "rewards/margins": 2.046143054962158, "rewards/rejected": -1.8405113220214844, "step": 3021 }, { "epoch": 0.35, "learning_rate": 1.984782863162823e-07, "logits/chosen": -3.098226547241211, "logits/rejected": -3.1351306438446045, "logps/chosen": -339.45343017578125, "logps/rejected": -303.92236328125, "loss": 1.8897, "rewards/accuracies": 0.25, "rewards/chosen": -1.9420448541641235, "rewards/margins": -0.21989381313323975, "rewards/rejected": -1.7221510410308838, "step": 3022 }, { "epoch": 0.35, "learning_rate": 1.9844316984665809e-07, "logits/chosen": -2.996962547302246, "logits/rejected": -2.885087490081787, "logps/chosen": -418.7176818847656, "logps/rejected": -286.46240234375, "loss": 0.1713, "rewards/accuracies": 1.0, "rewards/chosen": 0.5985704660415649, "rewards/margins": 2.354692220687866, "rewards/rejected": -1.7561216354370117, "step": 3023 }, { "epoch": 0.35, "learning_rate": 1.9840805337703384e-07, "logits/chosen": -2.605712413787842, "logits/rejected": -2.691016674041748, "logps/chosen": -283.2939453125, "logps/rejected": -332.7919921875, "loss": 0.8818, "rewards/accuracies": 0.375, "rewards/chosen": -0.4866686463356018, "rewards/margins": -0.034748926758766174, "rewards/rejected": -0.451919823884964, "step": 3024 }, { "epoch": 0.35, "learning_rate": 1.9837293690740957e-07, "logits/chosen": -3.6237730979919434, "logits/rejected": -3.3434765338897705, "logps/chosen": -260.3717956542969, "logps/rejected": -224.75294494628906, "loss": 0.3323, "rewards/accuracies": 0.875, "rewards/chosen": 0.1063823401927948, "rewards/margins": 2.3508050441741943, "rewards/rejected": -2.244422435760498, "step": 3025 }, { "epoch": 0.35, "learning_rate": 1.9833782043778532e-07, "logits/chosen": -3.303894281387329, "logits/rejected": -3.2161800861358643, "logps/chosen": -146.5291748046875, "logps/rejected": -194.72665405273438, "loss": 0.417, "rewards/accuracies": 0.875, "rewards/chosen": -0.16204451024532318, "rewards/margins": 1.404057264328003, "rewards/rejected": -1.5661019086837769, "step": 3026 }, { "epoch": 0.35, "learning_rate": 1.9830270396816105e-07, "logits/chosen": -2.1165835857391357, "logits/rejected": -2.270214080810547, "logps/chosen": -280.33050537109375, "logps/rejected": -210.07708740234375, "loss": 0.6495, "rewards/accuracies": 0.375, "rewards/chosen": -0.4028017520904541, "rewards/margins": 0.8968319892883301, "rewards/rejected": -1.2996337413787842, "step": 3027 }, { "epoch": 0.35, "learning_rate": 1.982675874985368e-07, "logits/chosen": -3.563145160675049, "logits/rejected": -3.32793927192688, "logps/chosen": -201.4566192626953, "logps/rejected": -114.2402114868164, "loss": 0.4588, "rewards/accuracies": 0.75, "rewards/chosen": -0.20357412099838257, "rewards/margins": 1.0731282234191895, "rewards/rejected": -1.2767024040222168, "step": 3028 }, { "epoch": 0.35, "learning_rate": 1.9823247102891256e-07, "logits/chosen": -3.150799512863159, "logits/rejected": -2.8476860523223877, "logps/chosen": -292.8707580566406, "logps/rejected": -285.7276306152344, "loss": 0.1391, "rewards/accuracies": 1.0, "rewards/chosen": 0.8865633010864258, "rewards/margins": 2.21144962310791, "rewards/rejected": -1.3248863220214844, "step": 3029 }, { "epoch": 0.35, "learning_rate": 1.9819735455928828e-07, "logits/chosen": -2.52286434173584, "logits/rejected": -2.74586820602417, "logps/chosen": -362.01422119140625, "logps/rejected": -241.31552124023438, "loss": 0.337, "rewards/accuracies": 0.875, "rewards/chosen": 0.07482895255088806, "rewards/margins": 1.8971986770629883, "rewards/rejected": -1.8223698139190674, "step": 3030 }, { "epoch": 0.35, "learning_rate": 1.9816223808966404e-07, "logits/chosen": -3.05952787399292, "logits/rejected": -3.217639684677124, "logps/chosen": -291.5096740722656, "logps/rejected": -207.1548309326172, "loss": 0.4365, "rewards/accuracies": 0.75, "rewards/chosen": -0.04779055714607239, "rewards/margins": 0.8655898571014404, "rewards/rejected": -0.9133803248405457, "step": 3031 }, { "epoch": 0.35, "learning_rate": 1.9812712162003982e-07, "logits/chosen": -3.334317684173584, "logits/rejected": -3.2033066749572754, "logps/chosen": -354.0831298828125, "logps/rejected": -286.35302734375, "loss": 0.321, "rewards/accuracies": 1.0, "rewards/chosen": 0.36995968222618103, "rewards/margins": 1.5233728885650635, "rewards/rejected": -1.15341317653656, "step": 3032 }, { "epoch": 0.35, "learning_rate": 1.9809200515041552e-07, "logits/chosen": -3.110982894897461, "logits/rejected": -2.9442169666290283, "logps/chosen": -382.1273193359375, "logps/rejected": -326.48187255859375, "loss": 0.4691, "rewards/accuracies": 0.75, "rewards/chosen": -0.1010933518409729, "rewards/margins": 0.8019925951957703, "rewards/rejected": -0.9030859470367432, "step": 3033 }, { "epoch": 0.35, "learning_rate": 1.980568886807913e-07, "logits/chosen": -3.4457716941833496, "logits/rejected": -3.2462565898895264, "logps/chosen": -322.18768310546875, "logps/rejected": -311.78790283203125, "loss": 0.4558, "rewards/accuracies": 0.75, "rewards/chosen": -0.6141657829284668, "rewards/margins": 0.9861765503883362, "rewards/rejected": -1.6003423929214478, "step": 3034 }, { "epoch": 0.35, "learning_rate": 1.9802177221116703e-07, "logits/chosen": -3.6390295028686523, "logits/rejected": -3.5001792907714844, "logps/chosen": -237.57666015625, "logps/rejected": -231.55169677734375, "loss": 0.2649, "rewards/accuracies": 1.0, "rewards/chosen": -0.18183813989162445, "rewards/margins": 2.0608084201812744, "rewards/rejected": -2.2426466941833496, "step": 3035 }, { "epoch": 0.35, "learning_rate": 1.9798665574154278e-07, "logits/chosen": -2.9726481437683105, "logits/rejected": -2.9976768493652344, "logps/chosen": -310.0140380859375, "logps/rejected": -191.63180541992188, "loss": 0.2261, "rewards/accuracies": 1.0, "rewards/chosen": 0.19015823304653168, "rewards/margins": 1.5200804471969604, "rewards/rejected": -1.3299221992492676, "step": 3036 }, { "epoch": 0.35, "learning_rate": 1.9795153927191853e-07, "logits/chosen": -2.87605357170105, "logits/rejected": -2.79805326461792, "logps/chosen": -199.61354064941406, "logps/rejected": -315.37994384765625, "loss": 0.2231, "rewards/accuracies": 0.875, "rewards/chosen": 0.0459071546792984, "rewards/margins": 2.6719188690185547, "rewards/rejected": -2.626011848449707, "step": 3037 }, { "epoch": 0.35, "learning_rate": 1.9791642280229426e-07, "logits/chosen": -3.4407434463500977, "logits/rejected": -3.536858320236206, "logps/chosen": -188.2257843017578, "logps/rejected": -272.03668212890625, "loss": 0.269, "rewards/accuracies": 1.0, "rewards/chosen": -0.19547808170318604, "rewards/margins": 1.6404101848602295, "rewards/rejected": -1.835888147354126, "step": 3038 }, { "epoch": 0.35, "learning_rate": 1.9788130633267001e-07, "logits/chosen": -3.929490089416504, "logits/rejected": -4.022392749786377, "logps/chosen": -227.07415771484375, "logps/rejected": -263.71087646484375, "loss": 0.4194, "rewards/accuracies": 0.75, "rewards/chosen": -0.20810705423355103, "rewards/margins": 0.9839985966682434, "rewards/rejected": -1.1921056509017944, "step": 3039 }, { "epoch": 0.35, "learning_rate": 1.9784618986304577e-07, "logits/chosen": -2.611607313156128, "logits/rejected": -2.630568504333496, "logps/chosen": -310.46295166015625, "logps/rejected": -265.02227783203125, "loss": 0.2031, "rewards/accuracies": 1.0, "rewards/chosen": -0.14055156707763672, "rewards/margins": 2.5918312072753906, "rewards/rejected": -2.7323827743530273, "step": 3040 }, { "epoch": 0.35, "learning_rate": 1.978110733934215e-07, "logits/chosen": -3.4036474227905273, "logits/rejected": -3.2815630435943604, "logps/chosen": -249.08258056640625, "logps/rejected": -235.0412139892578, "loss": 0.2034, "rewards/accuracies": 0.875, "rewards/chosen": 0.0114627406001091, "rewards/margins": 2.777311325073242, "rewards/rejected": -2.7658488750457764, "step": 3041 }, { "epoch": 0.35, "learning_rate": 1.9777595692379725e-07, "logits/chosen": -2.9080467224121094, "logits/rejected": -2.978524684906006, "logps/chosen": -167.72547912597656, "logps/rejected": -242.87387084960938, "loss": 0.6104, "rewards/accuracies": 0.625, "rewards/chosen": 0.17639777064323425, "rewards/margins": 1.325622797012329, "rewards/rejected": -1.149225115776062, "step": 3042 }, { "epoch": 0.35, "learning_rate": 1.9774084045417298e-07, "logits/chosen": -3.295597553253174, "logits/rejected": -3.4926910400390625, "logps/chosen": -207.98135375976562, "logps/rejected": -234.22433471679688, "loss": 0.4896, "rewards/accuracies": 0.75, "rewards/chosen": 0.11429326236248016, "rewards/margins": 1.1010631322860718, "rewards/rejected": -0.9867699146270752, "step": 3043 }, { "epoch": 0.35, "learning_rate": 1.9770572398454876e-07, "logits/chosen": -3.2846922874450684, "logits/rejected": -3.254024028778076, "logps/chosen": -388.8541564941406, "logps/rejected": -210.4857940673828, "loss": 0.4166, "rewards/accuracies": 0.75, "rewards/chosen": 0.7120705246925354, "rewards/margins": 1.476340413093567, "rewards/rejected": -0.7642698287963867, "step": 3044 }, { "epoch": 0.35, "learning_rate": 1.976706075149245e-07, "logits/chosen": -2.8282506465911865, "logits/rejected": -2.755333423614502, "logps/chosen": -354.48809814453125, "logps/rejected": -272.2617492675781, "loss": 0.5649, "rewards/accuracies": 0.75, "rewards/chosen": -0.09444017708301544, "rewards/margins": 0.8484975099563599, "rewards/rejected": -0.9429377317428589, "step": 3045 }, { "epoch": 0.35, "learning_rate": 1.9763549104530024e-07, "logits/chosen": -2.671649932861328, "logits/rejected": -2.8197784423828125, "logps/chosen": -223.82540893554688, "logps/rejected": -479.8037414550781, "loss": 0.1874, "rewards/accuracies": 0.875, "rewards/chosen": 0.03378252685070038, "rewards/margins": 2.399121046066284, "rewards/rejected": -2.3653385639190674, "step": 3046 }, { "epoch": 0.35, "learning_rate": 1.97600374575676e-07, "logits/chosen": -3.122842788696289, "logits/rejected": -3.284548759460449, "logps/chosen": -191.75709533691406, "logps/rejected": -241.4578399658203, "loss": 0.2279, "rewards/accuracies": 1.0, "rewards/chosen": 0.2843462824821472, "rewards/margins": 1.9698700904846191, "rewards/rejected": -1.6855237483978271, "step": 3047 }, { "epoch": 0.35, "learning_rate": 1.9756525810605172e-07, "logits/chosen": -3.865755319595337, "logits/rejected": -3.6041152477264404, "logps/chosen": -244.45639038085938, "logps/rejected": -217.13330078125, "loss": 0.3442, "rewards/accuracies": 0.875, "rewards/chosen": 0.0404282808303833, "rewards/margins": 1.0206739902496338, "rewards/rejected": -0.9802457094192505, "step": 3048 }, { "epoch": 0.35, "learning_rate": 1.9753014163642747e-07, "logits/chosen": -3.6419577598571777, "logits/rejected": -3.9636166095733643, "logps/chosen": -151.43231201171875, "logps/rejected": -233.0259552001953, "loss": 0.2829, "rewards/accuracies": 0.875, "rewards/chosen": 0.17550142109394073, "rewards/margins": 3.123033285140991, "rewards/rejected": -2.9475317001342773, "step": 3049 }, { "epoch": 0.35, "learning_rate": 1.9749502516680323e-07, "logits/chosen": -3.2303709983825684, "logits/rejected": -3.3413503170013428, "logps/chosen": -422.3179016113281, "logps/rejected": -252.9170379638672, "loss": 0.2598, "rewards/accuracies": 0.875, "rewards/chosen": 0.20782317221164703, "rewards/margins": 3.006639003753662, "rewards/rejected": -2.798815965652466, "step": 3050 }, { "epoch": 0.35, "learning_rate": 1.9745990869717896e-07, "logits/chosen": -3.103703260421753, "logits/rejected": -3.0119826793670654, "logps/chosen": -215.5072021484375, "logps/rejected": -300.2403259277344, "loss": 0.522, "rewards/accuracies": 0.75, "rewards/chosen": -0.6085277795791626, "rewards/margins": 1.361037254333496, "rewards/rejected": -1.9695651531219482, "step": 3051 }, { "epoch": 0.35, "learning_rate": 1.974247922275547e-07, "logits/chosen": -2.995516538619995, "logits/rejected": -3.27770733833313, "logps/chosen": -190.18251037597656, "logps/rejected": -276.3632507324219, "loss": 0.5397, "rewards/accuracies": 0.5, "rewards/chosen": -0.15863284468650818, "rewards/margins": 0.639234185218811, "rewards/rejected": -0.797866940498352, "step": 3052 }, { "epoch": 0.35, "learning_rate": 1.9738967575793046e-07, "logits/chosen": -3.341181755065918, "logits/rejected": -3.3497276306152344, "logps/chosen": -424.89697265625, "logps/rejected": -303.04144287109375, "loss": 0.3799, "rewards/accuracies": 0.75, "rewards/chosen": -0.1870366781949997, "rewards/margins": 1.6448566913604736, "rewards/rejected": -1.8318934440612793, "step": 3053 }, { "epoch": 0.35, "learning_rate": 1.973545592883062e-07, "logits/chosen": -2.8119359016418457, "logits/rejected": -2.788180112838745, "logps/chosen": -208.33493041992188, "logps/rejected": -329.6491394042969, "loss": 0.3586, "rewards/accuracies": 0.75, "rewards/chosen": 0.3023321032524109, "rewards/margins": 2.8718509674072266, "rewards/rejected": -2.569518804550171, "step": 3054 }, { "epoch": 0.35, "learning_rate": 1.9731944281868197e-07, "logits/chosen": -2.9192004203796387, "logits/rejected": -2.935391902923584, "logps/chosen": -222.40679931640625, "logps/rejected": -261.94488525390625, "loss": 0.4417, "rewards/accuracies": 0.75, "rewards/chosen": 0.0614902637898922, "rewards/margins": 2.0149471759796143, "rewards/rejected": -1.953456997871399, "step": 3055 }, { "epoch": 0.35, "learning_rate": 1.9728432634905767e-07, "logits/chosen": -3.262298822402954, "logits/rejected": -3.2442245483398438, "logps/chosen": -304.89141845703125, "logps/rejected": -313.369873046875, "loss": 0.4883, "rewards/accuracies": 0.875, "rewards/chosen": -0.3718996047973633, "rewards/margins": 2.149317502975464, "rewards/rejected": -2.5212173461914062, "step": 3056 }, { "epoch": 0.35, "learning_rate": 1.9724920987943345e-07, "logits/chosen": -2.8789186477661133, "logits/rejected": -3.0166006088256836, "logps/chosen": -294.1700134277344, "logps/rejected": -367.06231689453125, "loss": 0.3106, "rewards/accuracies": 0.75, "rewards/chosen": 0.3389640152454376, "rewards/margins": 2.8315038681030273, "rewards/rejected": -2.492539644241333, "step": 3057 }, { "epoch": 0.35, "learning_rate": 1.972140934098092e-07, "logits/chosen": -2.9572980403900146, "logits/rejected": -2.84912109375, "logps/chosen": -319.00738525390625, "logps/rejected": -287.9696960449219, "loss": 0.1747, "rewards/accuracies": 1.0, "rewards/chosen": 0.14011840522289276, "rewards/margins": 2.1210193634033203, "rewards/rejected": -1.9809010028839111, "step": 3058 }, { "epoch": 0.35, "learning_rate": 1.9717897694018493e-07, "logits/chosen": -2.8451247215270996, "logits/rejected": -2.8435986042022705, "logps/chosen": -296.18280029296875, "logps/rejected": -241.6285400390625, "loss": 0.5452, "rewards/accuracies": 0.5, "rewards/chosen": -0.42781952023506165, "rewards/margins": 1.4644172191619873, "rewards/rejected": -1.8922367095947266, "step": 3059 }, { "epoch": 0.35, "learning_rate": 1.971438604705607e-07, "logits/chosen": -1.9019265174865723, "logits/rejected": -2.157865047454834, "logps/chosen": -415.7928771972656, "logps/rejected": -224.9405059814453, "loss": 0.4648, "rewards/accuracies": 0.75, "rewards/chosen": 0.08197470754384995, "rewards/margins": 0.9937095046043396, "rewards/rejected": -0.911734938621521, "step": 3060 }, { "epoch": 0.35, "learning_rate": 1.9710874400093644e-07, "logits/chosen": -3.0277099609375, "logits/rejected": -3.2907814979553223, "logps/chosen": -310.419677734375, "logps/rejected": -261.60980224609375, "loss": 0.1682, "rewards/accuracies": 1.0, "rewards/chosen": 0.3358491063117981, "rewards/margins": 2.2276926040649414, "rewards/rejected": -1.8918434381484985, "step": 3061 }, { "epoch": 0.35, "learning_rate": 1.9707362753131217e-07, "logits/chosen": -2.665870189666748, "logits/rejected": -2.9369471073150635, "logps/chosen": -289.2737731933594, "logps/rejected": -165.29689025878906, "loss": 0.543, "rewards/accuracies": 0.75, "rewards/chosen": 0.21771953999996185, "rewards/margins": 1.5365526676177979, "rewards/rejected": -1.3188331127166748, "step": 3062 }, { "epoch": 0.35, "learning_rate": 1.9703851106168792e-07, "logits/chosen": -2.779461145401001, "logits/rejected": -2.6709160804748535, "logps/chosen": -360.2775573730469, "logps/rejected": -201.90740966796875, "loss": 0.4382, "rewards/accuracies": 0.75, "rewards/chosen": 0.11120958626270294, "rewards/margins": 1.4007362127304077, "rewards/rejected": -1.2895267009735107, "step": 3063 }, { "epoch": 0.35, "learning_rate": 1.9700339459206365e-07, "logits/chosen": -3.459818124771118, "logits/rejected": -3.352565288543701, "logps/chosen": -228.70184326171875, "logps/rejected": -218.30648803710938, "loss": 0.3644, "rewards/accuracies": 0.875, "rewards/chosen": -0.04656504467129707, "rewards/margins": 1.7341384887695312, "rewards/rejected": -1.7807034254074097, "step": 3064 }, { "epoch": 0.35, "learning_rate": 1.969682781224394e-07, "logits/chosen": -3.332549571990967, "logits/rejected": -3.393594741821289, "logps/chosen": -393.3575439453125, "logps/rejected": -221.424072265625, "loss": 0.5811, "rewards/accuracies": 0.875, "rewards/chosen": -0.24022534489631653, "rewards/margins": 0.8009299039840698, "rewards/rejected": -1.0411553382873535, "step": 3065 }, { "epoch": 0.35, "learning_rate": 1.9693316165281518e-07, "logits/chosen": -3.2575490474700928, "logits/rejected": -3.3155391216278076, "logps/chosen": -298.7373046875, "logps/rejected": -280.0418395996094, "loss": 0.1466, "rewards/accuracies": 1.0, "rewards/chosen": 0.5184282064437866, "rewards/margins": 2.8983774185180664, "rewards/rejected": -2.3799490928649902, "step": 3066 }, { "epoch": 0.35, "learning_rate": 1.9689804518319088e-07, "logits/chosen": -3.0950751304626465, "logits/rejected": -3.161099433898926, "logps/chosen": -260.45660400390625, "logps/rejected": -213.88665771484375, "loss": 0.2815, "rewards/accuracies": 0.875, "rewards/chosen": -0.04338574409484863, "rewards/margins": 2.7250099182128906, "rewards/rejected": -2.7683956623077393, "step": 3067 }, { "epoch": 0.35, "learning_rate": 1.9686292871356666e-07, "logits/chosen": -3.430783271789551, "logits/rejected": -3.4712109565734863, "logps/chosen": -225.34194946289062, "logps/rejected": -227.66732788085938, "loss": 0.7103, "rewards/accuracies": 0.75, "rewards/chosen": -0.658412754535675, "rewards/margins": 1.080237627029419, "rewards/rejected": -1.7386503219604492, "step": 3068 }, { "epoch": 0.35, "learning_rate": 1.9682781224394242e-07, "logits/chosen": -2.6669554710388184, "logits/rejected": -2.6647467613220215, "logps/chosen": -208.25653076171875, "logps/rejected": -458.4106140136719, "loss": 0.4452, "rewards/accuracies": 0.75, "rewards/chosen": 0.04271111264824867, "rewards/margins": 1.5185856819152832, "rewards/rejected": -1.475874662399292, "step": 3069 }, { "epoch": 0.35, "learning_rate": 1.9679269577431815e-07, "logits/chosen": -2.8078761100769043, "logits/rejected": -2.862076997756958, "logps/chosen": -198.1163787841797, "logps/rejected": -218.91244506835938, "loss": 0.3516, "rewards/accuracies": 0.75, "rewards/chosen": 0.37441080808639526, "rewards/margins": 2.130362033843994, "rewards/rejected": -1.7559514045715332, "step": 3070 }, { "epoch": 0.35, "learning_rate": 1.967575793046939e-07, "logits/chosen": -3.2165818214416504, "logits/rejected": -3.3916678428649902, "logps/chosen": -340.52783203125, "logps/rejected": -224.23562622070312, "loss": 0.5458, "rewards/accuracies": 0.625, "rewards/chosen": -0.4872143268585205, "rewards/margins": 1.549790620803833, "rewards/rejected": -2.0370049476623535, "step": 3071 }, { "epoch": 0.35, "learning_rate": 1.9672246283506963e-07, "logits/chosen": -2.727433443069458, "logits/rejected": -2.8505280017852783, "logps/chosen": -320.86407470703125, "logps/rejected": -320.6701354980469, "loss": 0.3794, "rewards/accuracies": 0.75, "rewards/chosen": 0.3218567371368408, "rewards/margins": 1.528525471687317, "rewards/rejected": -1.2066688537597656, "step": 3072 }, { "epoch": 0.35, "learning_rate": 1.9668734636544538e-07, "logits/chosen": -2.8702425956726074, "logits/rejected": -2.890501022338867, "logps/chosen": -315.35821533203125, "logps/rejected": -260.9658203125, "loss": 0.3229, "rewards/accuracies": 0.875, "rewards/chosen": -0.6542035341262817, "rewards/margins": 1.5954463481903076, "rewards/rejected": -2.2496497631073, "step": 3073 }, { "epoch": 0.35, "learning_rate": 1.9665222989582113e-07, "logits/chosen": -3.4884254932403564, "logits/rejected": -3.5635218620300293, "logps/chosen": -320.4202575683594, "logps/rejected": -212.19674682617188, "loss": 0.2118, "rewards/accuracies": 0.875, "rewards/chosen": 0.5530683994293213, "rewards/margins": 2.5857884883880615, "rewards/rejected": -2.0327200889587402, "step": 3074 }, { "epoch": 0.35, "learning_rate": 1.9661711342619686e-07, "logits/chosen": -3.0132694244384766, "logits/rejected": -2.898167133331299, "logps/chosen": -304.4390869140625, "logps/rejected": -215.00189208984375, "loss": 0.3254, "rewards/accuracies": 0.875, "rewards/chosen": -0.10484334081411362, "rewards/margins": 1.3833073377609253, "rewards/rejected": -1.488150715827942, "step": 3075 }, { "epoch": 0.35, "learning_rate": 1.9658199695657262e-07, "logits/chosen": -3.6952781677246094, "logits/rejected": -4.015458106994629, "logps/chosen": -182.2537078857422, "logps/rejected": -251.23565673828125, "loss": 0.6874, "rewards/accuracies": 0.625, "rewards/chosen": -0.5762869119644165, "rewards/margins": 0.47806811332702637, "rewards/rejected": -1.0543550252914429, "step": 3076 }, { "epoch": 0.35, "learning_rate": 1.965468804869484e-07, "logits/chosen": -3.045768976211548, "logits/rejected": -3.1276774406433105, "logps/chosen": -230.03053283691406, "logps/rejected": -202.70664978027344, "loss": 0.5215, "rewards/accuracies": 0.625, "rewards/chosen": -0.4107796847820282, "rewards/margins": 0.9814106225967407, "rewards/rejected": -1.3921902179718018, "step": 3077 }, { "epoch": 0.35, "learning_rate": 1.9651176401732412e-07, "logits/chosen": -2.740459442138672, "logits/rejected": -2.6614506244659424, "logps/chosen": -375.5497741699219, "logps/rejected": -314.50128173828125, "loss": 0.1655, "rewards/accuracies": 1.0, "rewards/chosen": 0.6683952808380127, "rewards/margins": 2.7857348918914795, "rewards/rejected": -2.1173393726348877, "step": 3078 }, { "epoch": 0.35, "learning_rate": 1.9647664754769988e-07, "logits/chosen": -2.8291549682617188, "logits/rejected": -3.107804298400879, "logps/chosen": -174.18634033203125, "logps/rejected": -193.68040466308594, "loss": 0.3792, "rewards/accuracies": 0.875, "rewards/chosen": 0.32329416275024414, "rewards/margins": 1.0610005855560303, "rewards/rejected": -0.7377064824104309, "step": 3079 }, { "epoch": 0.36, "learning_rate": 1.964415310780756e-07, "logits/chosen": -3.1709063053131104, "logits/rejected": -3.124007225036621, "logps/chosen": -185.44290161132812, "logps/rejected": -224.39195251464844, "loss": 0.2682, "rewards/accuracies": 1.0, "rewards/chosen": 0.08008065819740295, "rewards/margins": 1.6290040016174316, "rewards/rejected": -1.548923373222351, "step": 3080 }, { "epoch": 0.36, "learning_rate": 1.9640641460845136e-07, "logits/chosen": -2.4712629318237305, "logits/rejected": -2.3469796180725098, "logps/chosen": -94.00013732910156, "logps/rejected": -164.3089141845703, "loss": 0.4605, "rewards/accuracies": 0.75, "rewards/chosen": -0.3907009959220886, "rewards/margins": 0.9797440767288208, "rewards/rejected": -1.3704450130462646, "step": 3081 }, { "epoch": 0.36, "learning_rate": 1.963712981388271e-07, "logits/chosen": -2.5381901264190674, "logits/rejected": -2.5858469009399414, "logps/chosen": -279.7504577636719, "logps/rejected": -310.41522216796875, "loss": 0.3587, "rewards/accuracies": 0.875, "rewards/chosen": 0.012198060750961304, "rewards/margins": 2.116105556488037, "rewards/rejected": -2.103907585144043, "step": 3082 }, { "epoch": 0.36, "learning_rate": 1.9633618166920284e-07, "logits/chosen": -3.471062660217285, "logits/rejected": -3.4846184253692627, "logps/chosen": -203.15170288085938, "logps/rejected": -177.05755615234375, "loss": 0.5395, "rewards/accuracies": 0.75, "rewards/chosen": -0.5098321437835693, "rewards/margins": 1.0100224018096924, "rewards/rejected": -1.5198546648025513, "step": 3083 }, { "epoch": 0.36, "learning_rate": 1.963010651995786e-07, "logits/chosen": -2.7988507747650146, "logits/rejected": -2.8256518840789795, "logps/chosen": -287.4083251953125, "logps/rejected": -241.5149383544922, "loss": 0.5701, "rewards/accuracies": 0.5, "rewards/chosen": 0.5783542394638062, "rewards/margins": 1.3916542530059814, "rewards/rejected": -0.8133001327514648, "step": 3084 }, { "epoch": 0.36, "learning_rate": 1.9626594872995435e-07, "logits/chosen": -2.4115376472473145, "logits/rejected": -2.4969100952148438, "logps/chosen": -327.6260986328125, "logps/rejected": -332.6960754394531, "loss": 0.4141, "rewards/accuracies": 0.875, "rewards/chosen": -0.08624743670225143, "rewards/margins": 1.687882900238037, "rewards/rejected": -1.7741303443908691, "step": 3085 }, { "epoch": 0.36, "learning_rate": 1.9623083226033008e-07, "logits/chosen": -3.42287015914917, "logits/rejected": -3.5188708305358887, "logps/chosen": -163.1253204345703, "logps/rejected": -332.4408264160156, "loss": 0.63, "rewards/accuracies": 0.75, "rewards/chosen": -0.5863954424858093, "rewards/margins": 1.00204336643219, "rewards/rejected": -1.5884387493133545, "step": 3086 }, { "epoch": 0.36, "learning_rate": 1.9619571579070583e-07, "logits/chosen": -3.7319681644439697, "logits/rejected": -3.7785820960998535, "logps/chosen": -265.24932861328125, "logps/rejected": -245.22450256347656, "loss": 0.4261, "rewards/accuracies": 0.75, "rewards/chosen": -0.5445310473442078, "rewards/margins": 1.6153206825256348, "rewards/rejected": -2.1598517894744873, "step": 3087 }, { "epoch": 0.36, "learning_rate": 1.9616059932108156e-07, "logits/chosen": -2.8471860885620117, "logits/rejected": -3.107409954071045, "logps/chosen": -281.7643737792969, "logps/rejected": -209.75204467773438, "loss": 0.2052, "rewards/accuracies": 1.0, "rewards/chosen": 0.5447835326194763, "rewards/margins": 2.7791929244995117, "rewards/rejected": -2.2344093322753906, "step": 3088 }, { "epoch": 0.36, "learning_rate": 1.9612548285145734e-07, "logits/chosen": -3.0530147552490234, "logits/rejected": -2.825230598449707, "logps/chosen": -186.46145629882812, "logps/rejected": -155.78109741210938, "loss": 0.3455, "rewards/accuracies": 0.875, "rewards/chosen": -0.31543001532554626, "rewards/margins": 2.172807216644287, "rewards/rejected": -2.4882373809814453, "step": 3089 }, { "epoch": 0.36, "learning_rate": 1.960903663818331e-07, "logits/chosen": -2.751537561416626, "logits/rejected": -2.6698997020721436, "logps/chosen": -254.18115234375, "logps/rejected": -220.85328674316406, "loss": 0.3314, "rewards/accuracies": 0.875, "rewards/chosen": 0.24336227774620056, "rewards/margins": 1.1776258945465088, "rewards/rejected": -0.9342636466026306, "step": 3090 }, { "epoch": 0.36, "learning_rate": 1.9605524991220882e-07, "logits/chosen": -3.1359548568725586, "logits/rejected": -2.998751163482666, "logps/chosen": -290.08148193359375, "logps/rejected": -242.88125610351562, "loss": 0.3102, "rewards/accuracies": 0.875, "rewards/chosen": 0.3427727222442627, "rewards/margins": 1.8069825172424316, "rewards/rejected": -1.4642099142074585, "step": 3091 }, { "epoch": 0.36, "learning_rate": 1.9602013344258457e-07, "logits/chosen": -2.9210681915283203, "logits/rejected": -2.9084177017211914, "logps/chosen": -346.00054931640625, "logps/rejected": -326.629638671875, "loss": 0.4096, "rewards/accuracies": 0.875, "rewards/chosen": 0.24618424475193024, "rewards/margins": 2.0851447582244873, "rewards/rejected": -1.8389604091644287, "step": 3092 }, { "epoch": 0.36, "learning_rate": 1.959850169729603e-07, "logits/chosen": -2.930842161178589, "logits/rejected": -2.80279541015625, "logps/chosen": -449.075439453125, "logps/rejected": -228.6827850341797, "loss": 0.9606, "rewards/accuracies": 0.5, "rewards/chosen": -0.7300905585289001, "rewards/margins": 0.2501145601272583, "rewards/rejected": -0.9802049994468689, "step": 3093 }, { "epoch": 0.36, "learning_rate": 1.9594990050333605e-07, "logits/chosen": -2.839263439178467, "logits/rejected": -2.80460786819458, "logps/chosen": -324.3180847167969, "logps/rejected": -178.81686401367188, "loss": 0.445, "rewards/accuracies": 0.75, "rewards/chosen": 0.19470174610614777, "rewards/margins": 1.5245707035064697, "rewards/rejected": -1.329869031906128, "step": 3094 }, { "epoch": 0.36, "learning_rate": 1.959147840337118e-07, "logits/chosen": -3.0211663246154785, "logits/rejected": -3.2875208854675293, "logps/chosen": -258.13104248046875, "logps/rejected": -175.353515625, "loss": 0.5279, "rewards/accuracies": 0.625, "rewards/chosen": 0.3289003372192383, "rewards/margins": 0.7249034643173218, "rewards/rejected": -0.39600318670272827, "step": 3095 }, { "epoch": 0.36, "learning_rate": 1.9587966756408753e-07, "logits/chosen": -3.7746894359588623, "logits/rejected": -3.701540470123291, "logps/chosen": -87.59601593017578, "logps/rejected": -114.17941284179688, "loss": 0.4657, "rewards/accuracies": 0.75, "rewards/chosen": -0.27062538266181946, "rewards/margins": 1.0857970714569092, "rewards/rejected": -1.3564224243164062, "step": 3096 }, { "epoch": 0.36, "learning_rate": 1.958445510944633e-07, "logits/chosen": -2.119966983795166, "logits/rejected": -2.189887762069702, "logps/chosen": -334.360595703125, "logps/rejected": -364.8055725097656, "loss": 0.3091, "rewards/accuracies": 0.875, "rewards/chosen": 0.369712233543396, "rewards/margins": 1.367830514907837, "rewards/rejected": -0.9981181621551514, "step": 3097 }, { "epoch": 0.36, "learning_rate": 1.9580943462483904e-07, "logits/chosen": -2.4852919578552246, "logits/rejected": -2.3092923164367676, "logps/chosen": -354.326171875, "logps/rejected": -394.84991455078125, "loss": 0.0748, "rewards/accuracies": 1.0, "rewards/chosen": 0.19075548648834229, "rewards/margins": 3.3216190338134766, "rewards/rejected": -3.130863666534424, "step": 3098 }, { "epoch": 0.36, "learning_rate": 1.9577431815521477e-07, "logits/chosen": -2.86698579788208, "logits/rejected": -2.7887377738952637, "logps/chosen": -266.2457580566406, "logps/rejected": -174.58917236328125, "loss": 0.3057, "rewards/accuracies": 0.875, "rewards/chosen": -0.1278223991394043, "rewards/margins": 2.1043291091918945, "rewards/rejected": -2.232151508331299, "step": 3099 }, { "epoch": 0.36, "learning_rate": 1.9573920168559055e-07, "logits/chosen": -3.0276951789855957, "logits/rejected": -3.058997631072998, "logps/chosen": -157.2912139892578, "logps/rejected": -208.0201873779297, "loss": 0.4033, "rewards/accuracies": 0.75, "rewards/chosen": 0.1304360032081604, "rewards/margins": 1.4164899587631226, "rewards/rejected": -1.2860538959503174, "step": 3100 }, { "epoch": 0.36, "learning_rate": 1.9570408521596625e-07, "logits/chosen": -3.5753822326660156, "logits/rejected": -3.2642714977264404, "logps/chosen": -123.91609191894531, "logps/rejected": -142.77993774414062, "loss": 0.5112, "rewards/accuracies": 0.75, "rewards/chosen": -0.6242440342903137, "rewards/margins": 1.4839280843734741, "rewards/rejected": -2.1081719398498535, "step": 3101 }, { "epoch": 0.36, "learning_rate": 1.9566896874634203e-07, "logits/chosen": -3.2596631050109863, "logits/rejected": -3.2124130725860596, "logps/chosen": -278.4791259765625, "logps/rejected": -184.82432556152344, "loss": 0.34, "rewards/accuracies": 0.875, "rewards/chosen": -0.3263780474662781, "rewards/margins": 1.3903692960739136, "rewards/rejected": -1.7167472839355469, "step": 3102 }, { "epoch": 0.36, "learning_rate": 1.9563385227671778e-07, "logits/chosen": -3.1468048095703125, "logits/rejected": -2.9663121700286865, "logps/chosen": -227.12924194335938, "logps/rejected": -371.0093994140625, "loss": 0.3283, "rewards/accuracies": 0.875, "rewards/chosen": 0.17177477478981018, "rewards/margins": 1.9644711017608643, "rewards/rejected": -1.7926963567733765, "step": 3103 }, { "epoch": 0.36, "learning_rate": 1.955987358070935e-07, "logits/chosen": -2.4530160427093506, "logits/rejected": -2.5940890312194824, "logps/chosen": -243.99659729003906, "logps/rejected": -257.55047607421875, "loss": 0.4068, "rewards/accuracies": 0.75, "rewards/chosen": -0.08054850995540619, "rewards/margins": 1.1997195482254028, "rewards/rejected": -1.2802680730819702, "step": 3104 }, { "epoch": 0.36, "learning_rate": 1.9556361933746927e-07, "logits/chosen": -3.117814064025879, "logits/rejected": -3.1777310371398926, "logps/chosen": -170.281005859375, "logps/rejected": -217.00865173339844, "loss": 0.3274, "rewards/accuracies": 0.875, "rewards/chosen": 0.2771066427230835, "rewards/margins": 1.8005781173706055, "rewards/rejected": -1.523471474647522, "step": 3105 }, { "epoch": 0.36, "learning_rate": 1.9552850286784502e-07, "logits/chosen": -2.468282461166382, "logits/rejected": -2.6449267864227295, "logps/chosen": -231.47715759277344, "logps/rejected": -193.7936248779297, "loss": 0.4764, "rewards/accuracies": 0.75, "rewards/chosen": -0.026336491107940674, "rewards/margins": 1.1085357666015625, "rewards/rejected": -1.1348721981048584, "step": 3106 }, { "epoch": 0.36, "learning_rate": 1.9549338639822075e-07, "logits/chosen": -3.772731304168701, "logits/rejected": -3.5305869579315186, "logps/chosen": -189.17623901367188, "logps/rejected": -265.4544677734375, "loss": 0.5189, "rewards/accuracies": 0.625, "rewards/chosen": -0.6102039813995361, "rewards/margins": 0.9858118295669556, "rewards/rejected": -1.5960159301757812, "step": 3107 }, { "epoch": 0.36, "learning_rate": 1.954582699285965e-07, "logits/chosen": -2.8213677406311035, "logits/rejected": -2.987582206726074, "logps/chosen": -193.61802673339844, "logps/rejected": -239.55477905273438, "loss": 0.3603, "rewards/accuracies": 0.875, "rewards/chosen": -0.10257388651371002, "rewards/margins": 1.5944665670394897, "rewards/rejected": -1.6970404386520386, "step": 3108 }, { "epoch": 0.36, "learning_rate": 1.9542315345897223e-07, "logits/chosen": -3.5685129165649414, "logits/rejected": -3.624213933944702, "logps/chosen": -114.85082244873047, "logps/rejected": -187.01666259765625, "loss": 0.3148, "rewards/accuracies": 0.875, "rewards/chosen": 0.01748856157064438, "rewards/margins": 2.4503862857818604, "rewards/rejected": -2.4328978061676025, "step": 3109 }, { "epoch": 0.36, "learning_rate": 1.9538803698934798e-07, "logits/chosen": -2.980478286743164, "logits/rejected": -3.362539291381836, "logps/chosen": -158.89834594726562, "logps/rejected": -207.892578125, "loss": 0.2631, "rewards/accuracies": 0.75, "rewards/chosen": 0.3749452233314514, "rewards/margins": 3.1507649421691895, "rewards/rejected": -2.775819778442383, "step": 3110 }, { "epoch": 0.36, "learning_rate": 1.9535292051972376e-07, "logits/chosen": -2.8349218368530273, "logits/rejected": -2.8050320148468018, "logps/chosen": -397.3338623046875, "logps/rejected": -170.21737670898438, "loss": 0.72, "rewards/accuracies": 0.625, "rewards/chosen": -0.6490936279296875, "rewards/margins": 1.0060296058654785, "rewards/rejected": -1.655123233795166, "step": 3111 }, { "epoch": 0.36, "learning_rate": 1.953178040500995e-07, "logits/chosen": -2.6955342292785645, "logits/rejected": -2.5296480655670166, "logps/chosen": -200.2220001220703, "logps/rejected": -276.84991455078125, "loss": 0.6075, "rewards/accuracies": 0.875, "rewards/chosen": -0.2893408238887787, "rewards/margins": 0.6289680004119873, "rewards/rejected": -0.9183087944984436, "step": 3112 }, { "epoch": 0.36, "learning_rate": 1.9528268758047524e-07, "logits/chosen": -2.84305739402771, "logits/rejected": -2.9804935455322266, "logps/chosen": -269.8381652832031, "logps/rejected": -260.12884521484375, "loss": 0.5668, "rewards/accuracies": 0.625, "rewards/chosen": -0.3733377158641815, "rewards/margins": 1.8349182605743408, "rewards/rejected": -2.2082557678222656, "step": 3113 }, { "epoch": 0.36, "learning_rate": 1.95247571110851e-07, "logits/chosen": -3.008105754852295, "logits/rejected": -3.419436454772949, "logps/chosen": -168.17922973632812, "logps/rejected": -127.64070129394531, "loss": 0.4402, "rewards/accuracies": 0.875, "rewards/chosen": 0.20922866463661194, "rewards/margins": 1.341770052909851, "rewards/rejected": -1.1325414180755615, "step": 3114 }, { "epoch": 0.36, "learning_rate": 1.9521245464122673e-07, "logits/chosen": -2.782423973083496, "logits/rejected": -2.849886655807495, "logps/chosen": -264.4804382324219, "logps/rejected": -356.5912780761719, "loss": 0.3319, "rewards/accuracies": 0.875, "rewards/chosen": 0.4575228691101074, "rewards/margins": 1.7113043069839478, "rewards/rejected": -1.2537814378738403, "step": 3115 }, { "epoch": 0.36, "learning_rate": 1.9517733817160248e-07, "logits/chosen": -2.6934237480163574, "logits/rejected": -2.544127941131592, "logps/chosen": -239.30445861816406, "logps/rejected": -247.96731567382812, "loss": 0.3127, "rewards/accuracies": 0.875, "rewards/chosen": 0.08455897122621536, "rewards/margins": 1.6608185768127441, "rewards/rejected": -1.5762596130371094, "step": 3116 }, { "epoch": 0.36, "learning_rate": 1.951422217019782e-07, "logits/chosen": -3.177844524383545, "logits/rejected": -3.2488391399383545, "logps/chosen": -284.8919677734375, "logps/rejected": -206.64675903320312, "loss": 0.1555, "rewards/accuracies": 1.0, "rewards/chosen": 0.32208263874053955, "rewards/margins": 2.090003728866577, "rewards/rejected": -1.7679210901260376, "step": 3117 }, { "epoch": 0.36, "learning_rate": 1.9510710523235396e-07, "logits/chosen": -3.1049277782440186, "logits/rejected": -2.7169547080993652, "logps/chosen": -346.8677062988281, "logps/rejected": -336.5568542480469, "loss": 0.3661, "rewards/accuracies": 0.75, "rewards/chosen": 0.10120445489883423, "rewards/margins": 1.9424926042556763, "rewards/rejected": -1.8412880897521973, "step": 3118 }, { "epoch": 0.36, "learning_rate": 1.9507198876272971e-07, "logits/chosen": -3.155133008956909, "logits/rejected": -3.333463668823242, "logps/chosen": -246.06948852539062, "logps/rejected": -306.27447509765625, "loss": 0.2175, "rewards/accuracies": 1.0, "rewards/chosen": 0.427146852016449, "rewards/margins": 3.072503089904785, "rewards/rejected": -2.6453564167022705, "step": 3119 }, { "epoch": 0.36, "learning_rate": 1.9503687229310544e-07, "logits/chosen": -2.935206890106201, "logits/rejected": -2.903000593185425, "logps/chosen": -282.84906005859375, "logps/rejected": -199.02740478515625, "loss": 0.6734, "rewards/accuracies": 0.5, "rewards/chosen": -0.3533051013946533, "rewards/margins": 0.7861587405204773, "rewards/rejected": -1.1394637823104858, "step": 3120 }, { "epoch": 0.36, "learning_rate": 1.950017558234812e-07, "logits/chosen": -3.081047773361206, "logits/rejected": -3.1772549152374268, "logps/chosen": -341.4259948730469, "logps/rejected": -377.9747009277344, "loss": 0.1865, "rewards/accuracies": 1.0, "rewards/chosen": 0.6126015186309814, "rewards/margins": 2.7089593410491943, "rewards/rejected": -2.096357822418213, "step": 3121 }, { "epoch": 0.36, "learning_rate": 1.9496663935385698e-07, "logits/chosen": -3.3274025917053223, "logits/rejected": -3.181914806365967, "logps/chosen": -320.32696533203125, "logps/rejected": -213.698974609375, "loss": 0.3359, "rewards/accuracies": 0.75, "rewards/chosen": 0.6988868713378906, "rewards/margins": 2.239280939102173, "rewards/rejected": -1.5403938293457031, "step": 3122 }, { "epoch": 0.36, "learning_rate": 1.949315228842327e-07, "logits/chosen": -2.8600902557373047, "logits/rejected": -3.1306629180908203, "logps/chosen": -203.5533447265625, "logps/rejected": -228.5997772216797, "loss": 0.2809, "rewards/accuracies": 1.0, "rewards/chosen": -0.12238869816064835, "rewards/margins": 1.7218592166900635, "rewards/rejected": -1.8442476987838745, "step": 3123 }, { "epoch": 0.36, "learning_rate": 1.9489640641460846e-07, "logits/chosen": -1.9985876083374023, "logits/rejected": -2.338883876800537, "logps/chosen": -321.4471740722656, "logps/rejected": -234.20289611816406, "loss": 0.224, "rewards/accuracies": 1.0, "rewards/chosen": 0.09489896893501282, "rewards/margins": 1.5055862665176392, "rewards/rejected": -1.4106873273849487, "step": 3124 }, { "epoch": 0.36, "learning_rate": 1.9486128994498418e-07, "logits/chosen": -3.3073911666870117, "logits/rejected": -3.0754551887512207, "logps/chosen": -303.9153747558594, "logps/rejected": -255.05633544921875, "loss": 0.2527, "rewards/accuracies": 0.875, "rewards/chosen": -0.17883943021297455, "rewards/margins": 2.0495049953460693, "rewards/rejected": -2.228344440460205, "step": 3125 }, { "epoch": 0.36, "learning_rate": 1.9482617347535994e-07, "logits/chosen": -2.7084317207336426, "logits/rejected": -2.904076337814331, "logps/chosen": -286.6516418457031, "logps/rejected": -288.10247802734375, "loss": 0.541, "rewards/accuracies": 0.75, "rewards/chosen": -0.7445558309555054, "rewards/margins": 1.1072702407836914, "rewards/rejected": -1.8518260717391968, "step": 3126 }, { "epoch": 0.36, "learning_rate": 1.947910570057357e-07, "logits/chosen": -3.025954246520996, "logits/rejected": -3.003652572631836, "logps/chosen": -124.65135955810547, "logps/rejected": -272.06390380859375, "loss": 0.4778, "rewards/accuracies": 0.75, "rewards/chosen": -0.12165818363428116, "rewards/margins": 1.2299613952636719, "rewards/rejected": -1.3516194820404053, "step": 3127 }, { "epoch": 0.36, "learning_rate": 1.9475594053611142e-07, "logits/chosen": -3.5752532482147217, "logits/rejected": -3.469583034515381, "logps/chosen": -284.8741149902344, "logps/rejected": -279.1533203125, "loss": 0.7118, "rewards/accuracies": 0.75, "rewards/chosen": -0.0186142735183239, "rewards/margins": 0.5026312470436096, "rewards/rejected": -0.521245539188385, "step": 3128 }, { "epoch": 0.36, "learning_rate": 1.9472082406648717e-07, "logits/chosen": -1.9693684577941895, "logits/rejected": -1.7743173837661743, "logps/chosen": -362.24920654296875, "logps/rejected": -359.17877197265625, "loss": 0.5973, "rewards/accuracies": 0.5, "rewards/chosen": -0.6720864772796631, "rewards/margins": 0.8427870273590088, "rewards/rejected": -1.5148735046386719, "step": 3129 }, { "epoch": 0.36, "learning_rate": 1.9468570759686293e-07, "logits/chosen": -2.61252498626709, "logits/rejected": -2.764272451400757, "logps/chosen": -289.86468505859375, "logps/rejected": -241.3604278564453, "loss": 0.5395, "rewards/accuracies": 0.625, "rewards/chosen": -0.5666022300720215, "rewards/margins": 0.7167096734046936, "rewards/rejected": -1.2833119630813599, "step": 3130 }, { "epoch": 0.36, "learning_rate": 1.9465059112723865e-07, "logits/chosen": -3.1690239906311035, "logits/rejected": -3.3270647525787354, "logps/chosen": -177.7681121826172, "logps/rejected": -356.51983642578125, "loss": 0.2839, "rewards/accuracies": 0.875, "rewards/chosen": 0.19588251411914825, "rewards/margins": 2.389726400375366, "rewards/rejected": -2.1938438415527344, "step": 3131 }, { "epoch": 0.36, "learning_rate": 1.946154746576144e-07, "logits/chosen": -3.2140862941741943, "logits/rejected": -3.5098836421966553, "logps/chosen": -195.50234985351562, "logps/rejected": -327.5115661621094, "loss": 0.3091, "rewards/accuracies": 0.75, "rewards/chosen": -0.4320847690105438, "rewards/margins": 2.2779173851013184, "rewards/rejected": -2.7100021839141846, "step": 3132 }, { "epoch": 0.36, "learning_rate": 1.9458035818799014e-07, "logits/chosen": -3.3292362689971924, "logits/rejected": -3.2177438735961914, "logps/chosen": -160.10154724121094, "logps/rejected": -196.58212280273438, "loss": 1.3432, "rewards/accuracies": 0.5, "rewards/chosen": -1.791459560394287, "rewards/margins": -0.3153592348098755, "rewards/rejected": -1.4761004447937012, "step": 3133 }, { "epoch": 0.36, "learning_rate": 1.9454524171836592e-07, "logits/chosen": -2.882235050201416, "logits/rejected": -2.893958806991577, "logps/chosen": -258.2135009765625, "logps/rejected": -222.9864044189453, "loss": 0.6598, "rewards/accuracies": 0.625, "rewards/chosen": -0.15996044874191284, "rewards/margins": 1.0880732536315918, "rewards/rejected": -1.2480336427688599, "step": 3134 }, { "epoch": 0.36, "learning_rate": 1.9451012524874167e-07, "logits/chosen": -2.6734280586242676, "logits/rejected": -2.736083507537842, "logps/chosen": -314.3001708984375, "logps/rejected": -264.7532653808594, "loss": 0.2385, "rewards/accuracies": 0.875, "rewards/chosen": 0.44309449195861816, "rewards/margins": 2.706721544265747, "rewards/rejected": -2.26362681388855, "step": 3135 }, { "epoch": 0.36, "learning_rate": 1.944750087791174e-07, "logits/chosen": -3.4513635635375977, "logits/rejected": -3.6545827388763428, "logps/chosen": -214.97882080078125, "logps/rejected": -235.15771484375, "loss": 0.274, "rewards/accuracies": 0.875, "rewards/chosen": -0.026938125491142273, "rewards/margins": 2.167647361755371, "rewards/rejected": -2.1945855617523193, "step": 3136 }, { "epoch": 0.36, "learning_rate": 1.9443989230949315e-07, "logits/chosen": -2.933361530303955, "logits/rejected": -2.754458427429199, "logps/chosen": -197.4517364501953, "logps/rejected": -224.07888793945312, "loss": 0.273, "rewards/accuracies": 0.875, "rewards/chosen": 0.4978392422199249, "rewards/margins": 1.8580803871154785, "rewards/rejected": -1.360241174697876, "step": 3137 }, { "epoch": 0.36, "learning_rate": 1.9440477583986888e-07, "logits/chosen": -2.9276881217956543, "logits/rejected": -3.0481343269348145, "logps/chosen": -320.54998779296875, "logps/rejected": -163.0902557373047, "loss": 0.3918, "rewards/accuracies": 0.875, "rewards/chosen": -0.17877639830112457, "rewards/margins": 0.9948813319206238, "rewards/rejected": -1.173657774925232, "step": 3138 }, { "epoch": 0.36, "learning_rate": 1.9436965937024463e-07, "logits/chosen": -4.334774494171143, "logits/rejected": -3.926363945007324, "logps/chosen": -255.1059112548828, "logps/rejected": -215.308349609375, "loss": 0.482, "rewards/accuracies": 0.75, "rewards/chosen": -0.2157684564590454, "rewards/margins": 1.1830332279205322, "rewards/rejected": -1.398801565170288, "step": 3139 }, { "epoch": 0.36, "learning_rate": 1.9433454290062039e-07, "logits/chosen": -2.6784138679504395, "logits/rejected": -2.4433908462524414, "logps/chosen": -345.3836975097656, "logps/rejected": -345.91033935546875, "loss": 0.5368, "rewards/accuracies": 0.875, "rewards/chosen": -0.613369345664978, "rewards/margins": 1.7982168197631836, "rewards/rejected": -2.411586284637451, "step": 3140 }, { "epoch": 0.36, "learning_rate": 1.9429942643099611e-07, "logits/chosen": -2.617370128631592, "logits/rejected": -2.732225179672241, "logps/chosen": -253.665283203125, "logps/rejected": -193.13133239746094, "loss": 0.2801, "rewards/accuracies": 1.0, "rewards/chosen": 0.04861985146999359, "rewards/margins": 1.7302783727645874, "rewards/rejected": -1.681658387184143, "step": 3141 }, { "epoch": 0.36, "learning_rate": 1.9426430996137187e-07, "logits/chosen": -3.260280132293701, "logits/rejected": -3.184082508087158, "logps/chosen": -316.1820373535156, "logps/rejected": -195.85411071777344, "loss": 0.5302, "rewards/accuracies": 0.75, "rewards/chosen": -0.5305548906326294, "rewards/margins": 1.2422001361846924, "rewards/rejected": -1.7727551460266113, "step": 3142 }, { "epoch": 0.36, "learning_rate": 1.9422919349174762e-07, "logits/chosen": -3.3487188816070557, "logits/rejected": -3.4055697917938232, "logps/chosen": -252.9063720703125, "logps/rejected": -308.38653564453125, "loss": 0.3402, "rewards/accuracies": 0.875, "rewards/chosen": -0.11965171247720718, "rewards/margins": 1.3292714357376099, "rewards/rejected": -1.448923110961914, "step": 3143 }, { "epoch": 0.36, "learning_rate": 1.9419407702212335e-07, "logits/chosen": -2.840327739715576, "logits/rejected": -2.553577423095703, "logps/chosen": -277.79779052734375, "logps/rejected": -272.0838317871094, "loss": 0.7271, "rewards/accuracies": 0.75, "rewards/chosen": -0.07231229543685913, "rewards/margins": 0.6255306005477905, "rewards/rejected": -0.6978428959846497, "step": 3144 }, { "epoch": 0.36, "learning_rate": 1.9415896055249913e-07, "logits/chosen": -3.9872355461120605, "logits/rejected": -4.02241325378418, "logps/chosen": -267.6069030761719, "logps/rejected": -216.90277099609375, "loss": 0.4144, "rewards/accuracies": 0.75, "rewards/chosen": -0.31662413477897644, "rewards/margins": 1.290988564491272, "rewards/rejected": -1.6076128482818604, "step": 3145 }, { "epoch": 0.36, "learning_rate": 1.9412384408287486e-07, "logits/chosen": -2.714205265045166, "logits/rejected": -2.5255908966064453, "logps/chosen": -336.0819396972656, "logps/rejected": -185.05406188964844, "loss": 0.7168, "rewards/accuracies": 0.625, "rewards/chosen": -0.21295166015625, "rewards/margins": 0.35085803270339966, "rewards/rejected": -0.5638096928596497, "step": 3146 }, { "epoch": 0.36, "learning_rate": 1.940887276132506e-07, "logits/chosen": -2.165637493133545, "logits/rejected": -2.294268846511841, "logps/chosen": -397.3463134765625, "logps/rejected": -523.36279296875, "loss": 0.6833, "rewards/accuracies": 0.5, "rewards/chosen": -0.35481566190719604, "rewards/margins": 0.8367919325828552, "rewards/rejected": -1.1916077136993408, "step": 3147 }, { "epoch": 0.36, "learning_rate": 1.9405361114362636e-07, "logits/chosen": -2.8628482818603516, "logits/rejected": -2.515326499938965, "logps/chosen": -417.503173828125, "logps/rejected": -268.97613525390625, "loss": 0.402, "rewards/accuracies": 0.875, "rewards/chosen": -0.3466891348361969, "rewards/margins": 1.3490190505981445, "rewards/rejected": -1.6957082748413086, "step": 3148 }, { "epoch": 0.36, "learning_rate": 1.940184946740021e-07, "logits/chosen": -4.020050525665283, "logits/rejected": -3.744053602218628, "logps/chosen": -277.6413269042969, "logps/rejected": -253.1767578125, "loss": 0.2919, "rewards/accuracies": 0.875, "rewards/chosen": -0.4056010842323303, "rewards/margins": 2.1107869148254395, "rewards/rejected": -2.516387939453125, "step": 3149 }, { "epoch": 0.36, "learning_rate": 1.9398337820437785e-07, "logits/chosen": -2.723170042037964, "logits/rejected": -2.948158025741577, "logps/chosen": -256.3647155761719, "logps/rejected": -292.14044189453125, "loss": 0.2604, "rewards/accuracies": 0.875, "rewards/chosen": 0.15477971732616425, "rewards/margins": 1.7812004089355469, "rewards/rejected": -1.6264207363128662, "step": 3150 }, { "epoch": 0.36, "learning_rate": 1.939482617347536e-07, "logits/chosen": -3.267343521118164, "logits/rejected": -3.414912700653076, "logps/chosen": -303.08807373046875, "logps/rejected": -327.85272216796875, "loss": 0.371, "rewards/accuracies": 0.875, "rewards/chosen": 0.09791743755340576, "rewards/margins": 2.0082571506500244, "rewards/rejected": -1.910339593887329, "step": 3151 }, { "epoch": 0.36, "learning_rate": 1.9391314526512933e-07, "logits/chosen": -3.181365966796875, "logits/rejected": -3.0855886936187744, "logps/chosen": -278.80596923828125, "logps/rejected": -187.27603149414062, "loss": 0.2912, "rewards/accuracies": 0.875, "rewards/chosen": -0.1476357877254486, "rewards/margins": 1.5447094440460205, "rewards/rejected": -1.692345380783081, "step": 3152 }, { "epoch": 0.36, "learning_rate": 1.9387802879550508e-07, "logits/chosen": -2.430910587310791, "logits/rejected": -2.4548614025115967, "logps/chosen": -491.0486755371094, "logps/rejected": -325.77783203125, "loss": 0.8776, "rewards/accuracies": 0.75, "rewards/chosen": -0.7084676623344421, "rewards/margins": 0.39052680134773254, "rewards/rejected": -1.098994493484497, "step": 3153 }, { "epoch": 0.36, "learning_rate": 1.938429123258808e-07, "logits/chosen": -3.0407025814056396, "logits/rejected": -2.7807254791259766, "logps/chosen": -291.38995361328125, "logps/rejected": -177.69686889648438, "loss": 0.3467, "rewards/accuracies": 0.875, "rewards/chosen": 0.2521824240684509, "rewards/margins": 1.6012303829193115, "rewards/rejected": -1.3490480184555054, "step": 3154 }, { "epoch": 0.36, "learning_rate": 1.9380779585625656e-07, "logits/chosen": -2.9747557640075684, "logits/rejected": -2.933706283569336, "logps/chosen": -241.34112548828125, "logps/rejected": -180.63107299804688, "loss": 0.2545, "rewards/accuracies": 0.75, "rewards/chosen": 0.09966646134853363, "rewards/margins": 2.479768753051758, "rewards/rejected": -2.3801023960113525, "step": 3155 }, { "epoch": 0.36, "learning_rate": 1.9377267938663234e-07, "logits/chosen": -2.959115505218506, "logits/rejected": -3.2778992652893066, "logps/chosen": -270.78936767578125, "logps/rejected": -419.3648986816406, "loss": 0.445, "rewards/accuracies": 0.875, "rewards/chosen": -0.7614113092422485, "rewards/margins": 1.7615286111831665, "rewards/rejected": -2.522939920425415, "step": 3156 }, { "epoch": 0.36, "learning_rate": 1.9373756291700807e-07, "logits/chosen": -2.664050817489624, "logits/rejected": -2.7092819213867188, "logps/chosen": -196.298828125, "logps/rejected": -324.38543701171875, "loss": 0.3798, "rewards/accuracies": 0.75, "rewards/chosen": 0.13760371506214142, "rewards/margins": 1.2768279314041138, "rewards/rejected": -1.1392244100570679, "step": 3157 }, { "epoch": 0.36, "learning_rate": 1.9370244644738382e-07, "logits/chosen": -2.862964391708374, "logits/rejected": -3.007152557373047, "logps/chosen": -323.246826171875, "logps/rejected": -166.35379028320312, "loss": 0.4695, "rewards/accuracies": 0.75, "rewards/chosen": -0.010654762387275696, "rewards/margins": 1.7380387783050537, "rewards/rejected": -1.7486937046051025, "step": 3158 }, { "epoch": 0.36, "learning_rate": 1.9366732997775958e-07, "logits/chosen": -3.801161766052246, "logits/rejected": -3.822605609893799, "logps/chosen": -177.2981414794922, "logps/rejected": -164.15139770507812, "loss": 0.5274, "rewards/accuracies": 0.625, "rewards/chosen": -0.7948575019836426, "rewards/margins": 0.8318825960159302, "rewards/rejected": -1.6267402172088623, "step": 3159 }, { "epoch": 0.36, "learning_rate": 1.936322135081353e-07, "logits/chosen": -3.1773176193237305, "logits/rejected": -2.96525239944458, "logps/chosen": -365.242431640625, "logps/rejected": -307.3572692871094, "loss": 0.5885, "rewards/accuracies": 0.75, "rewards/chosen": 0.0967455580830574, "rewards/margins": 0.6376952528953552, "rewards/rejected": -0.5409497618675232, "step": 3160 }, { "epoch": 0.36, "learning_rate": 1.9359709703851106e-07, "logits/chosen": -3.4648280143737793, "logits/rejected": -3.7581849098205566, "logps/chosen": -357.2449951171875, "logps/rejected": -314.943115234375, "loss": 0.3883, "rewards/accuracies": 0.75, "rewards/chosen": -0.59961998462677, "rewards/margins": 1.2466429471969604, "rewards/rejected": -1.8462629318237305, "step": 3161 }, { "epoch": 0.36, "learning_rate": 1.9356198056888679e-07, "logits/chosen": -2.6313295364379883, "logits/rejected": -2.792285442352295, "logps/chosen": -135.06724548339844, "logps/rejected": -217.89158630371094, "loss": 0.6147, "rewards/accuracies": 0.5, "rewards/chosen": -0.0711510181427002, "rewards/margins": 0.39882540702819824, "rewards/rejected": -0.46997642517089844, "step": 3162 }, { "epoch": 0.36, "learning_rate": 1.9352686409926254e-07, "logits/chosen": -3.9241766929626465, "logits/rejected": -3.8566627502441406, "logps/chosen": -284.4398193359375, "logps/rejected": -352.3221435546875, "loss": 0.368, "rewards/accuracies": 0.875, "rewards/chosen": -0.5513325929641724, "rewards/margins": 2.3936877250671387, "rewards/rejected": -2.9450201988220215, "step": 3163 }, { "epoch": 0.36, "learning_rate": 1.934917476296383e-07, "logits/chosen": -3.068753480911255, "logits/rejected": -3.1875836849212646, "logps/chosen": -301.783203125, "logps/rejected": -233.98565673828125, "loss": 0.2578, "rewards/accuracies": 1.0, "rewards/chosen": 0.6921709775924683, "rewards/margins": 1.975440502166748, "rewards/rejected": -1.2832696437835693, "step": 3164 }, { "epoch": 0.36, "learning_rate": 1.9345663116001402e-07, "logits/chosen": -3.226672887802124, "logits/rejected": -3.318633556365967, "logps/chosen": -269.446533203125, "logps/rejected": -286.0015869140625, "loss": 0.2603, "rewards/accuracies": 0.875, "rewards/chosen": -0.26339155435562134, "rewards/margins": 1.6534026861190796, "rewards/rejected": -1.9167943000793457, "step": 3165 }, { "epoch": 0.36, "learning_rate": 1.9342151469038977e-07, "logits/chosen": -3.206882953643799, "logits/rejected": -3.2786176204681396, "logps/chosen": -173.13641357421875, "logps/rejected": -165.844482421875, "loss": 0.3851, "rewards/accuracies": 0.875, "rewards/chosen": 0.051384419202804565, "rewards/margins": 1.1042113304138184, "rewards/rejected": -1.052827000617981, "step": 3166 }, { "epoch": 0.37, "learning_rate": 1.9338639822076556e-07, "logits/chosen": -2.49845027923584, "logits/rejected": -2.826868772506714, "logps/chosen": -403.8288879394531, "logps/rejected": -257.8197021484375, "loss": 0.3645, "rewards/accuracies": 0.75, "rewards/chosen": 0.6386094093322754, "rewards/margins": 1.3163683414459229, "rewards/rejected": -0.6777588725090027, "step": 3167 }, { "epoch": 0.37, "learning_rate": 1.9335128175114128e-07, "logits/chosen": -3.104853868484497, "logits/rejected": -3.050532102584839, "logps/chosen": -351.00360107421875, "logps/rejected": -203.71531677246094, "loss": 0.1741, "rewards/accuracies": 1.0, "rewards/chosen": 0.30171069502830505, "rewards/margins": 2.229830741882324, "rewards/rejected": -1.9281201362609863, "step": 3168 }, { "epoch": 0.37, "learning_rate": 1.9331616528151704e-07, "logits/chosen": -2.3206558227539062, "logits/rejected": -2.5209836959838867, "logps/chosen": -318.3055725097656, "logps/rejected": -270.863037109375, "loss": 0.2066, "rewards/accuracies": 1.0, "rewards/chosen": 0.663703203201294, "rewards/margins": 2.0448946952819824, "rewards/rejected": -1.381191611289978, "step": 3169 }, { "epoch": 0.37, "learning_rate": 1.9328104881189276e-07, "logits/chosen": -2.5549418926239014, "logits/rejected": -2.7501368522644043, "logps/chosen": -204.6936798095703, "logps/rejected": -335.27996826171875, "loss": 0.5318, "rewards/accuracies": 0.625, "rewards/chosen": -0.3278293311595917, "rewards/margins": 1.1896642446517944, "rewards/rejected": -1.5174936056137085, "step": 3170 }, { "epoch": 0.37, "learning_rate": 1.9324593234226852e-07, "logits/chosen": -2.3297410011291504, "logits/rejected": -2.4476966857910156, "logps/chosen": -198.59457397460938, "logps/rejected": -205.01426696777344, "loss": 0.9792, "rewards/accuracies": 0.5, "rewards/chosen": -1.0319730043411255, "rewards/margins": -0.0736834704875946, "rewards/rejected": -0.958289623260498, "step": 3171 }, { "epoch": 0.37, "learning_rate": 1.9321081587264427e-07, "logits/chosen": -3.3408331871032715, "logits/rejected": -3.54144287109375, "logps/chosen": -156.85028076171875, "logps/rejected": -168.27459716796875, "loss": 0.557, "rewards/accuracies": 0.75, "rewards/chosen": -0.4681437611579895, "rewards/margins": 0.7855842113494873, "rewards/rejected": -1.2537280321121216, "step": 3172 }, { "epoch": 0.37, "learning_rate": 1.9317569940302e-07, "logits/chosen": -2.618577718734741, "logits/rejected": -2.7099623680114746, "logps/chosen": -251.7499237060547, "logps/rejected": -211.65570068359375, "loss": 0.2359, "rewards/accuracies": 0.875, "rewards/chosen": 0.5767526030540466, "rewards/margins": 3.0526134967803955, "rewards/rejected": -2.475860834121704, "step": 3173 }, { "epoch": 0.37, "learning_rate": 1.9314058293339575e-07, "logits/chosen": -2.828080177307129, "logits/rejected": -2.793627977371216, "logps/chosen": -323.42999267578125, "logps/rejected": -282.9430236816406, "loss": 0.3621, "rewards/accuracies": 0.875, "rewards/chosen": 0.18002460896968842, "rewards/margins": 1.5425063371658325, "rewards/rejected": -1.3624818325042725, "step": 3174 }, { "epoch": 0.37, "learning_rate": 1.931054664637715e-07, "logits/chosen": -2.6242740154266357, "logits/rejected": -2.8956551551818848, "logps/chosen": -252.06881713867188, "logps/rejected": -245.21841430664062, "loss": 0.2947, "rewards/accuracies": 0.875, "rewards/chosen": -0.3929525315761566, "rewards/margins": 1.5374438762664795, "rewards/rejected": -1.930396318435669, "step": 3175 }, { "epoch": 0.37, "learning_rate": 1.9307034999414723e-07, "logits/chosen": -3.0584704875946045, "logits/rejected": -3.1872775554656982, "logps/chosen": -237.08355712890625, "logps/rejected": -160.3933868408203, "loss": 0.5175, "rewards/accuracies": 0.5, "rewards/chosen": -0.09294568002223969, "rewards/margins": 1.720481038093567, "rewards/rejected": -1.8134267330169678, "step": 3176 }, { "epoch": 0.37, "learning_rate": 1.93035233524523e-07, "logits/chosen": -3.177743911743164, "logits/rejected": -3.0853703022003174, "logps/chosen": -126.09711456298828, "logps/rejected": -126.9111557006836, "loss": 0.6389, "rewards/accuracies": 0.75, "rewards/chosen": -0.13913212716579437, "rewards/margins": 0.45784538984298706, "rewards/rejected": -0.5969774723052979, "step": 3177 }, { "epoch": 0.37, "learning_rate": 1.9300011705489872e-07, "logits/chosen": -2.8288044929504395, "logits/rejected": -2.655651092529297, "logps/chosen": -429.07073974609375, "logps/rejected": -254.28530883789062, "loss": 0.1468, "rewards/accuracies": 1.0, "rewards/chosen": 0.5850380659103394, "rewards/margins": 2.4018473625183105, "rewards/rejected": -1.8168094158172607, "step": 3178 }, { "epoch": 0.37, "learning_rate": 1.929650005852745e-07, "logits/chosen": -3.537522077560425, "logits/rejected": -3.495020866394043, "logps/chosen": -222.0142364501953, "logps/rejected": -217.59652709960938, "loss": 0.8089, "rewards/accuracies": 0.5, "rewards/chosen": -1.0027263164520264, "rewards/margins": 0.10398638248443604, "rewards/rejected": -1.1067125797271729, "step": 3179 }, { "epoch": 0.37, "learning_rate": 1.9292988411565025e-07, "logits/chosen": -2.921863317489624, "logits/rejected": -2.872988224029541, "logps/chosen": -320.1180419921875, "logps/rejected": -260.6642761230469, "loss": 0.1433, "rewards/accuracies": 1.0, "rewards/chosen": -0.29767608642578125, "rewards/margins": 2.4440011978149414, "rewards/rejected": -2.7416772842407227, "step": 3180 }, { "epoch": 0.37, "learning_rate": 1.9289476764602598e-07, "logits/chosen": -3.510861873626709, "logits/rejected": -3.541411876678467, "logps/chosen": -232.81268310546875, "logps/rejected": -241.0210723876953, "loss": 0.2038, "rewards/accuracies": 0.875, "rewards/chosen": 0.2568121552467346, "rewards/margins": 2.2625627517700195, "rewards/rejected": -2.0057506561279297, "step": 3181 }, { "epoch": 0.37, "learning_rate": 1.9285965117640173e-07, "logits/chosen": -3.2083821296691895, "logits/rejected": -3.5233545303344727, "logps/chosen": -157.1383056640625, "logps/rejected": -169.42965698242188, "loss": 0.4002, "rewards/accuracies": 0.875, "rewards/chosen": -0.07776683568954468, "rewards/margins": 1.0177593231201172, "rewards/rejected": -1.095526099205017, "step": 3182 }, { "epoch": 0.37, "learning_rate": 1.9282453470677748e-07, "logits/chosen": -2.787046432495117, "logits/rejected": -2.9413928985595703, "logps/chosen": -157.8968505859375, "logps/rejected": -239.31356811523438, "loss": 0.3314, "rewards/accuracies": 0.875, "rewards/chosen": 0.2068593055009842, "rewards/margins": 1.6255970001220703, "rewards/rejected": -1.418737769126892, "step": 3183 }, { "epoch": 0.37, "learning_rate": 1.927894182371532e-07, "logits/chosen": -2.4852118492126465, "logits/rejected": -2.5678765773773193, "logps/chosen": -251.90386962890625, "logps/rejected": -297.1863708496094, "loss": 0.2944, "rewards/accuracies": 0.875, "rewards/chosen": -0.08986397087574005, "rewards/margins": 1.9219286441802979, "rewards/rejected": -2.0117926597595215, "step": 3184 }, { "epoch": 0.37, "learning_rate": 1.9275430176752897e-07, "logits/chosen": -2.8091835975646973, "logits/rejected": -3.001106023788452, "logps/chosen": -269.08966064453125, "logps/rejected": -262.691162109375, "loss": 0.2466, "rewards/accuracies": 1.0, "rewards/chosen": 0.13353192806243896, "rewards/margins": 2.324127674102783, "rewards/rejected": -2.1905956268310547, "step": 3185 }, { "epoch": 0.37, "learning_rate": 1.927191852979047e-07, "logits/chosen": -2.685307025909424, "logits/rejected": -3.134504795074463, "logps/chosen": -197.0176544189453, "logps/rejected": -163.26231384277344, "loss": 0.6256, "rewards/accuracies": 0.625, "rewards/chosen": 0.17461934685707092, "rewards/margins": 0.9495357275009155, "rewards/rejected": -0.7749163508415222, "step": 3186 }, { "epoch": 0.37, "learning_rate": 1.9268406882828045e-07, "logits/chosen": -3.203638792037964, "logits/rejected": -2.848695993423462, "logps/chosen": -126.86032104492188, "logps/rejected": -203.45050048828125, "loss": 0.3792, "rewards/accuracies": 0.875, "rewards/chosen": -0.7413145899772644, "rewards/margins": 0.9371476769447327, "rewards/rejected": -1.6784621477127075, "step": 3187 }, { "epoch": 0.37, "learning_rate": 1.926489523586562e-07, "logits/chosen": -2.9424662590026855, "logits/rejected": -3.1661322116851807, "logps/chosen": -183.78929138183594, "logps/rejected": -233.72328186035156, "loss": 0.2321, "rewards/accuracies": 1.0, "rewards/chosen": 0.22609272599220276, "rewards/margins": 2.498617172241211, "rewards/rejected": -2.272524356842041, "step": 3188 }, { "epoch": 0.37, "learning_rate": 1.9261383588903193e-07, "logits/chosen": -3.0470893383026123, "logits/rejected": -3.0329556465148926, "logps/chosen": -158.0448760986328, "logps/rejected": -215.769287109375, "loss": 0.3073, "rewards/accuracies": 0.875, "rewards/chosen": 2.4080276489257812e-05, "rewards/margins": 2.050638198852539, "rewards/rejected": -2.0506138801574707, "step": 3189 }, { "epoch": 0.37, "learning_rate": 1.925787194194077e-07, "logits/chosen": -3.0375590324401855, "logits/rejected": -3.0227859020233154, "logps/chosen": -310.0751647949219, "logps/rejected": -328.383544921875, "loss": 0.6103, "rewards/accuracies": 0.75, "rewards/chosen": -0.19008426368236542, "rewards/margins": 0.7001814246177673, "rewards/rejected": -0.8902656435966492, "step": 3190 }, { "epoch": 0.37, "learning_rate": 1.9254360294978344e-07, "logits/chosen": -2.3290019035339355, "logits/rejected": -2.441476345062256, "logps/chosen": -296.9434814453125, "logps/rejected": -315.8758544921875, "loss": 0.4435, "rewards/accuracies": 0.75, "rewards/chosen": 0.3532309830188751, "rewards/margins": 1.5018521547317505, "rewards/rejected": -1.1486213207244873, "step": 3191 }, { "epoch": 0.37, "learning_rate": 1.925084864801592e-07, "logits/chosen": -2.2796630859375, "logits/rejected": -2.489521026611328, "logps/chosen": -396.0833435058594, "logps/rejected": -218.4396209716797, "loss": 0.4627, "rewards/accuracies": 0.75, "rewards/chosen": -0.13185662031173706, "rewards/margins": 1.1018154621124268, "rewards/rejected": -1.233672022819519, "step": 3192 }, { "epoch": 0.37, "learning_rate": 1.9247337001053494e-07, "logits/chosen": -3.558927297592163, "logits/rejected": -3.430616855621338, "logps/chosen": -221.1109161376953, "logps/rejected": -241.5007781982422, "loss": 0.1454, "rewards/accuracies": 0.875, "rewards/chosen": 0.23434436321258545, "rewards/margins": 4.832926273345947, "rewards/rejected": -4.598581790924072, "step": 3193 }, { "epoch": 0.37, "learning_rate": 1.9243825354091067e-07, "logits/chosen": -3.2861223220825195, "logits/rejected": -3.1551432609558105, "logps/chosen": -99.10779571533203, "logps/rejected": -114.55242156982422, "loss": 0.3638, "rewards/accuracies": 0.875, "rewards/chosen": 0.14754851162433624, "rewards/margins": 1.79312264919281, "rewards/rejected": -1.6455740928649902, "step": 3194 }, { "epoch": 0.37, "learning_rate": 1.9240313707128642e-07, "logits/chosen": -2.242636203765869, "logits/rejected": -2.3370909690856934, "logps/chosen": -451.64892578125, "logps/rejected": -248.69992065429688, "loss": 0.2934, "rewards/accuracies": 0.875, "rewards/chosen": 0.08344341814517975, "rewards/margins": 1.6113696098327637, "rewards/rejected": -1.5279263257980347, "step": 3195 }, { "epoch": 0.37, "learning_rate": 1.9236802060166218e-07, "logits/chosen": -3.7533931732177734, "logits/rejected": -3.269853115081787, "logps/chosen": -384.52545166015625, "logps/rejected": -331.7982177734375, "loss": 0.3781, "rewards/accuracies": 0.875, "rewards/chosen": 0.025455381721258163, "rewards/margins": 1.0872496366500854, "rewards/rejected": -1.0617942810058594, "step": 3196 }, { "epoch": 0.37, "learning_rate": 1.923329041320379e-07, "logits/chosen": -3.019721746444702, "logits/rejected": -3.1479649543762207, "logps/chosen": -210.34909057617188, "logps/rejected": -228.738525390625, "loss": 0.3425, "rewards/accuracies": 0.875, "rewards/chosen": 0.15386933088302612, "rewards/margins": 1.859704613685608, "rewards/rejected": -1.7058351039886475, "step": 3197 }, { "epoch": 0.37, "learning_rate": 1.9229778766241366e-07, "logits/chosen": -2.7576911449432373, "logits/rejected": -2.8671483993530273, "logps/chosen": -296.6002502441406, "logps/rejected": -192.82598876953125, "loss": 0.3087, "rewards/accuracies": 0.875, "rewards/chosen": 0.21972621977329254, "rewards/margins": 1.6246566772460938, "rewards/rejected": -1.4049303531646729, "step": 3198 }, { "epoch": 0.37, "learning_rate": 1.922626711927894e-07, "logits/chosen": -3.7675492763519287, "logits/rejected": -3.9964122772216797, "logps/chosen": -176.26800537109375, "logps/rejected": -227.17747497558594, "loss": 0.2093, "rewards/accuracies": 0.875, "rewards/chosen": 0.31062066555023193, "rewards/margins": 2.37013578414917, "rewards/rejected": -2.0595154762268066, "step": 3199 }, { "epoch": 0.37, "learning_rate": 1.9222755472316514e-07, "logits/chosen": -3.2625598907470703, "logits/rejected": -3.0744194984436035, "logps/chosen": -394.7248229980469, "logps/rejected": -332.4236145019531, "loss": 0.3813, "rewards/accuracies": 0.875, "rewards/chosen": -0.12685956060886383, "rewards/margins": 1.0682594776153564, "rewards/rejected": -1.1951191425323486, "step": 3200 }, { "epoch": 0.37, "learning_rate": 1.9219243825354092e-07, "logits/chosen": -3.2528815269470215, "logits/rejected": -3.1706550121307373, "logps/chosen": -351.19891357421875, "logps/rejected": -313.12933349609375, "loss": 0.4284, "rewards/accuracies": 0.75, "rewards/chosen": -0.53228759765625, "rewards/margins": 1.420660138130188, "rewards/rejected": -1.9529476165771484, "step": 3201 }, { "epoch": 0.37, "learning_rate": 1.9215732178391665e-07, "logits/chosen": -3.1220457553863525, "logits/rejected": -3.3700711727142334, "logps/chosen": -251.88401794433594, "logps/rejected": -196.0518798828125, "loss": 0.4355, "rewards/accuracies": 0.625, "rewards/chosen": -0.6162028312683105, "rewards/margins": 1.6659945249557495, "rewards/rejected": -2.2821974754333496, "step": 3202 }, { "epoch": 0.37, "learning_rate": 1.921222053142924e-07, "logits/chosen": -3.1230826377868652, "logits/rejected": -2.8391857147216797, "logps/chosen": -381.2209777832031, "logps/rejected": -312.4192810058594, "loss": 0.2594, "rewards/accuracies": 0.875, "rewards/chosen": 0.07837282121181488, "rewards/margins": 2.351102828979492, "rewards/rejected": -2.2727298736572266, "step": 3203 }, { "epoch": 0.37, "learning_rate": 1.9208708884466816e-07, "logits/chosen": -3.2833290100097656, "logits/rejected": -3.3133749961853027, "logps/chosen": -214.62765502929688, "logps/rejected": -201.61557006835938, "loss": 0.5971, "rewards/accuracies": 0.75, "rewards/chosen": -0.06315914541482925, "rewards/margins": 1.0348143577575684, "rewards/rejected": -1.0979735851287842, "step": 3204 }, { "epoch": 0.37, "learning_rate": 1.9205197237504388e-07, "logits/chosen": -2.581875801086426, "logits/rejected": -2.335019111633301, "logps/chosen": -262.3916015625, "logps/rejected": -333.5521545410156, "loss": 0.6641, "rewards/accuracies": 0.625, "rewards/chosen": -3.2767653465270996e-05, "rewards/margins": 0.36479470133781433, "rewards/rejected": -0.3648275136947632, "step": 3205 }, { "epoch": 0.37, "learning_rate": 1.9201685590541964e-07, "logits/chosen": -2.985363245010376, "logits/rejected": -2.973149538040161, "logps/chosen": -403.019287109375, "logps/rejected": -222.37109375, "loss": 0.4155, "rewards/accuracies": 0.625, "rewards/chosen": -0.7481920719146729, "rewards/margins": 1.3837943077087402, "rewards/rejected": -2.131986141204834, "step": 3206 }, { "epoch": 0.37, "learning_rate": 1.9198173943579537e-07, "logits/chosen": -2.712921380996704, "logits/rejected": -2.682893753051758, "logps/chosen": -296.4322814941406, "logps/rejected": -310.62030029296875, "loss": 0.263, "rewards/accuracies": 1.0, "rewards/chosen": -0.3677060306072235, "rewards/margins": 2.4026906490325928, "rewards/rejected": -2.7703967094421387, "step": 3207 }, { "epoch": 0.37, "learning_rate": 1.9194662296617112e-07, "logits/chosen": -3.2227590084075928, "logits/rejected": -3.538888454437256, "logps/chosen": -179.6095428466797, "logps/rejected": -202.28724670410156, "loss": 0.1674, "rewards/accuracies": 1.0, "rewards/chosen": 0.1451857089996338, "rewards/margins": 2.2049641609191895, "rewards/rejected": -2.0597784519195557, "step": 3208 }, { "epoch": 0.37, "learning_rate": 1.9191150649654687e-07, "logits/chosen": -2.466966390609741, "logits/rejected": -2.716768980026245, "logps/chosen": -297.9671630859375, "logps/rejected": -233.03448486328125, "loss": 0.4269, "rewards/accuracies": 0.875, "rewards/chosen": 0.23534274101257324, "rewards/margins": 1.1087921857833862, "rewards/rejected": -0.873449444770813, "step": 3209 }, { "epoch": 0.37, "learning_rate": 1.918763900269226e-07, "logits/chosen": -2.7078092098236084, "logits/rejected": -2.448582649230957, "logps/chosen": -291.03289794921875, "logps/rejected": -255.4500732421875, "loss": 0.273, "rewards/accuracies": 1.0, "rewards/chosen": 0.10297869145870209, "rewards/margins": 1.7684916257858276, "rewards/rejected": -1.665513038635254, "step": 3210 }, { "epoch": 0.37, "learning_rate": 1.9184127355729835e-07, "logits/chosen": -3.6057567596435547, "logits/rejected": -3.57731294631958, "logps/chosen": -196.20156860351562, "logps/rejected": -94.17867279052734, "loss": 0.5884, "rewards/accuracies": 0.5, "rewards/chosen": -0.2778235077857971, "rewards/margins": 0.842933714389801, "rewards/rejected": -1.1207573413848877, "step": 3211 }, { "epoch": 0.37, "learning_rate": 1.9180615708767413e-07, "logits/chosen": -2.54561448097229, "logits/rejected": -2.239021062850952, "logps/chosen": -292.28204345703125, "logps/rejected": -340.16009521484375, "loss": 0.1683, "rewards/accuracies": 0.875, "rewards/chosen": 0.1486508995294571, "rewards/margins": 2.8863699436187744, "rewards/rejected": -2.7377190589904785, "step": 3212 }, { "epoch": 0.37, "learning_rate": 1.9177104061804986e-07, "logits/chosen": -3.4201903343200684, "logits/rejected": -3.356454849243164, "logps/chosen": -123.1936264038086, "logps/rejected": -186.9005889892578, "loss": 0.3934, "rewards/accuracies": 0.75, "rewards/chosen": -0.5036709904670715, "rewards/margins": 1.2099732160568237, "rewards/rejected": -1.71364426612854, "step": 3213 }, { "epoch": 0.37, "learning_rate": 1.9173592414842562e-07, "logits/chosen": -3.868863105773926, "logits/rejected": -3.5191736221313477, "logps/chosen": -274.5214538574219, "logps/rejected": -279.2172546386719, "loss": 0.4968, "rewards/accuracies": 0.625, "rewards/chosen": 0.4927408695220947, "rewards/margins": 0.8367164731025696, "rewards/rejected": -0.3439755439758301, "step": 3214 }, { "epoch": 0.37, "learning_rate": 1.9170080767880134e-07, "logits/chosen": -3.4469943046569824, "logits/rejected": -3.4305765628814697, "logps/chosen": -79.89260864257812, "logps/rejected": -150.68055725097656, "loss": 0.4172, "rewards/accuracies": 0.875, "rewards/chosen": 0.3113442659378052, "rewards/margins": 1.696946144104004, "rewards/rejected": -1.3856017589569092, "step": 3215 }, { "epoch": 0.37, "learning_rate": 1.916656912091771e-07, "logits/chosen": -3.140932083129883, "logits/rejected": -3.209458827972412, "logps/chosen": -453.2861633300781, "logps/rejected": -381.22442626953125, "loss": 0.1949, "rewards/accuracies": 0.875, "rewards/chosen": 0.42390185594558716, "rewards/margins": 2.575244903564453, "rewards/rejected": -2.1513431072235107, "step": 3216 }, { "epoch": 0.37, "learning_rate": 1.9163057473955285e-07, "logits/chosen": -2.546959161758423, "logits/rejected": -2.6946377754211426, "logps/chosen": -317.20208740234375, "logps/rejected": -279.99334716796875, "loss": 0.3014, "rewards/accuracies": 0.875, "rewards/chosen": -0.1507071554660797, "rewards/margins": 2.0866451263427734, "rewards/rejected": -2.237352132797241, "step": 3217 }, { "epoch": 0.37, "learning_rate": 1.9159545826992858e-07, "logits/chosen": -3.326563835144043, "logits/rejected": -3.102933883666992, "logps/chosen": -295.2079772949219, "logps/rejected": -267.52801513671875, "loss": 0.4642, "rewards/accuracies": 0.875, "rewards/chosen": -0.4555734395980835, "rewards/margins": 1.3516632318496704, "rewards/rejected": -1.8072367906570435, "step": 3218 }, { "epoch": 0.37, "learning_rate": 1.9156034180030433e-07, "logits/chosen": -3.2117018699645996, "logits/rejected": -2.8602612018585205, "logps/chosen": -407.1167907714844, "logps/rejected": -316.2675476074219, "loss": 0.3929, "rewards/accuracies": 0.875, "rewards/chosen": -0.6173150539398193, "rewards/margins": 1.251056432723999, "rewards/rejected": -1.8683714866638184, "step": 3219 }, { "epoch": 0.37, "learning_rate": 1.9152522533068009e-07, "logits/chosen": -3.4387295246124268, "logits/rejected": -3.4039721488952637, "logps/chosen": -483.8855895996094, "logps/rejected": -264.09417724609375, "loss": 0.2339, "rewards/accuracies": 0.875, "rewards/chosen": 0.531195342540741, "rewards/margins": 2.5248587131500244, "rewards/rejected": -1.9936631917953491, "step": 3220 }, { "epoch": 0.37, "learning_rate": 1.9149010886105581e-07, "logits/chosen": -3.2351975440979004, "logits/rejected": -3.429494619369507, "logps/chosen": -119.4773178100586, "logps/rejected": -181.87091064453125, "loss": 0.3195, "rewards/accuracies": 0.875, "rewards/chosen": 0.08006696403026581, "rewards/margins": 2.6682989597320557, "rewards/rejected": -2.5882320404052734, "step": 3221 }, { "epoch": 0.37, "learning_rate": 1.9145499239143157e-07, "logits/chosen": -2.8644163608551025, "logits/rejected": -2.9814255237579346, "logps/chosen": -305.98089599609375, "logps/rejected": -212.46640014648438, "loss": 0.505, "rewards/accuracies": 0.75, "rewards/chosen": -0.2272948920726776, "rewards/margins": 1.0894325971603394, "rewards/rejected": -1.3167275190353394, "step": 3222 }, { "epoch": 0.37, "learning_rate": 1.914198759218073e-07, "logits/chosen": -3.14992618560791, "logits/rejected": -3.4204845428466797, "logps/chosen": -352.255126953125, "logps/rejected": -361.21490478515625, "loss": 0.5094, "rewards/accuracies": 0.625, "rewards/chosen": 0.2552977204322815, "rewards/margins": 0.9476374387741089, "rewards/rejected": -0.6923396587371826, "step": 3223 }, { "epoch": 0.37, "learning_rate": 1.9138475945218307e-07, "logits/chosen": -2.758504629135132, "logits/rejected": -2.6833691596984863, "logps/chosen": -588.2627563476562, "logps/rejected": -384.4261169433594, "loss": 0.3157, "rewards/accuracies": 0.875, "rewards/chosen": 0.1297607421875, "rewards/margins": 1.599515438079834, "rewards/rejected": -1.4697548151016235, "step": 3224 }, { "epoch": 0.37, "learning_rate": 1.9134964298255883e-07, "logits/chosen": -3.8217179775238037, "logits/rejected": -3.8644299507141113, "logps/chosen": -186.1505126953125, "logps/rejected": -173.45523071289062, "loss": 0.4233, "rewards/accuracies": 0.875, "rewards/chosen": -0.18376606702804565, "rewards/margins": 1.1243703365325928, "rewards/rejected": -1.3081363439559937, "step": 3225 }, { "epoch": 0.37, "learning_rate": 1.9131452651293456e-07, "logits/chosen": -3.2060937881469727, "logits/rejected": -3.176862955093384, "logps/chosen": -286.36358642578125, "logps/rejected": -277.86297607421875, "loss": 0.7765, "rewards/accuracies": 0.625, "rewards/chosen": -0.6222411394119263, "rewards/margins": 0.7616034746170044, "rewards/rejected": -1.3838446140289307, "step": 3226 }, { "epoch": 0.37, "learning_rate": 1.912794100433103e-07, "logits/chosen": -2.6943438053131104, "logits/rejected": -2.6788463592529297, "logps/chosen": -406.9605712890625, "logps/rejected": -218.1900634765625, "loss": 0.384, "rewards/accuracies": 0.75, "rewards/chosen": 0.1339031159877777, "rewards/margins": 1.5504209995269775, "rewards/rejected": -1.416517972946167, "step": 3227 }, { "epoch": 0.37, "learning_rate": 1.9124429357368606e-07, "logits/chosen": -3.1584818363189697, "logits/rejected": -3.2305140495300293, "logps/chosen": -276.3624572753906, "logps/rejected": -271.6645202636719, "loss": 0.3265, "rewards/accuracies": 0.875, "rewards/chosen": -0.3623605966567993, "rewards/margins": 2.2047200202941895, "rewards/rejected": -2.5670809745788574, "step": 3228 }, { "epoch": 0.37, "learning_rate": 1.912091771040618e-07, "logits/chosen": -3.479188919067383, "logits/rejected": -3.4197165966033936, "logps/chosen": -142.38247680664062, "logps/rejected": -176.1259765625, "loss": 0.4916, "rewards/accuracies": 0.625, "rewards/chosen": -0.43982207775115967, "rewards/margins": 2.0081262588500977, "rewards/rejected": -2.447948455810547, "step": 3229 }, { "epoch": 0.37, "learning_rate": 1.9117406063443755e-07, "logits/chosen": -3.4238433837890625, "logits/rejected": -2.7327935695648193, "logps/chosen": -302.55841064453125, "logps/rejected": -130.51016235351562, "loss": 0.4101, "rewards/accuracies": 0.75, "rewards/chosen": -0.17379309237003326, "rewards/margins": 1.0282922983169556, "rewards/rejected": -1.2020853757858276, "step": 3230 }, { "epoch": 0.37, "learning_rate": 1.9113894416481327e-07, "logits/chosen": -2.588366746902466, "logits/rejected": -2.4745826721191406, "logps/chosen": -284.635498046875, "logps/rejected": -244.81593322753906, "loss": 0.516, "rewards/accuracies": 0.75, "rewards/chosen": -0.29966822266578674, "rewards/margins": 1.046379566192627, "rewards/rejected": -1.3460476398468018, "step": 3231 }, { "epoch": 0.37, "learning_rate": 1.9110382769518903e-07, "logits/chosen": -3.3810057640075684, "logits/rejected": -3.381730794906616, "logps/chosen": -173.1231689453125, "logps/rejected": -182.25955200195312, "loss": 0.6605, "rewards/accuracies": 0.875, "rewards/chosen": -0.060542285442352295, "rewards/margins": 0.6775486469268799, "rewards/rejected": -0.7380909323692322, "step": 3232 }, { "epoch": 0.37, "learning_rate": 1.910687112255648e-07, "logits/chosen": -3.0645012855529785, "logits/rejected": -3.495469093322754, "logps/chosen": -328.2294006347656, "logps/rejected": -399.32452392578125, "loss": 0.3123, "rewards/accuracies": 0.875, "rewards/chosen": -0.2017148733139038, "rewards/margins": 1.4950721263885498, "rewards/rejected": -1.6967869997024536, "step": 3233 }, { "epoch": 0.37, "learning_rate": 1.910335947559405e-07, "logits/chosen": -3.646066904067993, "logits/rejected": -3.6076340675354004, "logps/chosen": -120.30828857421875, "logps/rejected": -193.1534881591797, "loss": 0.2496, "rewards/accuracies": 1.0, "rewards/chosen": 0.33989855647087097, "rewards/margins": 2.6305253505706787, "rewards/rejected": -2.2906270027160645, "step": 3234 }, { "epoch": 0.37, "learning_rate": 1.909984782863163e-07, "logits/chosen": -3.2925965785980225, "logits/rejected": -2.952096462249756, "logps/chosen": -303.0674743652344, "logps/rejected": -432.8258056640625, "loss": 0.2326, "rewards/accuracies": 1.0, "rewards/chosen": 0.15167789161205292, "rewards/margins": 1.9019863605499268, "rewards/rejected": -1.7503085136413574, "step": 3235 }, { "epoch": 0.37, "learning_rate": 1.9096336181669202e-07, "logits/chosen": -2.8985912799835205, "logits/rejected": -2.9279794692993164, "logps/chosen": -149.3248291015625, "logps/rejected": -259.2939453125, "loss": 0.3726, "rewards/accuracies": 0.75, "rewards/chosen": -0.5507509708404541, "rewards/margins": 2.6153724193573, "rewards/rejected": -3.166123390197754, "step": 3236 }, { "epoch": 0.37, "learning_rate": 1.9092824534706777e-07, "logits/chosen": -2.7447702884674072, "logits/rejected": -2.529634714126587, "logps/chosen": -372.3981018066406, "logps/rejected": -370.15618896484375, "loss": 0.2146, "rewards/accuracies": 1.0, "rewards/chosen": -0.18551182746887207, "rewards/margins": 1.5035548210144043, "rewards/rejected": -1.6890666484832764, "step": 3237 }, { "epoch": 0.37, "learning_rate": 1.9089312887744352e-07, "logits/chosen": -3.1495883464813232, "logits/rejected": -3.0153558254241943, "logps/chosen": -330.2779541015625, "logps/rejected": -249.9334716796875, "loss": 0.2788, "rewards/accuracies": 0.875, "rewards/chosen": -0.2555235028266907, "rewards/margins": 1.4679946899414062, "rewards/rejected": -1.7235183715820312, "step": 3238 }, { "epoch": 0.37, "learning_rate": 1.9085801240781925e-07, "logits/chosen": -2.6077158451080322, "logits/rejected": -2.6612613201141357, "logps/chosen": -169.96072387695312, "logps/rejected": -207.2468719482422, "loss": 0.488, "rewards/accuracies": 0.875, "rewards/chosen": -0.10723993182182312, "rewards/margins": 0.9653011560440063, "rewards/rejected": -1.0725409984588623, "step": 3239 }, { "epoch": 0.37, "learning_rate": 1.90822895938195e-07, "logits/chosen": -2.372718572616577, "logits/rejected": -2.3203091621398926, "logps/chosen": -183.3533172607422, "logps/rejected": -325.9496154785156, "loss": 0.2134, "rewards/accuracies": 1.0, "rewards/chosen": -0.19972002506256104, "rewards/margins": 2.1437699794769287, "rewards/rejected": -2.3434898853302, "step": 3240 }, { "epoch": 0.37, "learning_rate": 1.9078777946857076e-07, "logits/chosen": -3.1283442974090576, "logits/rejected": -3.0133469104766846, "logps/chosen": -245.3660430908203, "logps/rejected": -273.9266662597656, "loss": 0.6642, "rewards/accuracies": 0.625, "rewards/chosen": -0.6120586395263672, "rewards/margins": 0.6677566766738892, "rewards/rejected": -1.2798153162002563, "step": 3241 }, { "epoch": 0.37, "learning_rate": 1.9075266299894649e-07, "logits/chosen": -3.1479129791259766, "logits/rejected": -3.1463518142700195, "logps/chosen": -449.2508544921875, "logps/rejected": -273.6101379394531, "loss": 0.1929, "rewards/accuracies": 0.875, "rewards/chosen": 0.22827793657779694, "rewards/margins": 2.315734624862671, "rewards/rejected": -2.087456703186035, "step": 3242 }, { "epoch": 0.37, "learning_rate": 1.9071754652932224e-07, "logits/chosen": -3.03220534324646, "logits/rejected": -3.182962417602539, "logps/chosen": -253.18397521972656, "logps/rejected": -317.7624816894531, "loss": 0.2952, "rewards/accuracies": 0.875, "rewards/chosen": 0.0026053711771965027, "rewards/margins": 2.8622312545776367, "rewards/rejected": -2.859625816345215, "step": 3243 }, { "epoch": 0.37, "learning_rate": 1.9068243005969797e-07, "logits/chosen": -2.888859510421753, "logits/rejected": -3.1244959831237793, "logps/chosen": -241.65017700195312, "logps/rejected": -349.09881591796875, "loss": 0.2942, "rewards/accuracies": 0.875, "rewards/chosen": -0.016743693500757217, "rewards/margins": 3.190858840942383, "rewards/rejected": -3.2076027393341064, "step": 3244 }, { "epoch": 0.37, "learning_rate": 1.9064731359007372e-07, "logits/chosen": -2.896139144897461, "logits/rejected": -2.836839437484741, "logps/chosen": -331.42730712890625, "logps/rejected": -174.92251586914062, "loss": 0.3901, "rewards/accuracies": 0.75, "rewards/chosen": -0.18573100864887238, "rewards/margins": 1.3835103511810303, "rewards/rejected": -1.5692414045333862, "step": 3245 }, { "epoch": 0.37, "learning_rate": 1.906121971204495e-07, "logits/chosen": -3.3594613075256348, "logits/rejected": -3.548163414001465, "logps/chosen": -112.77045440673828, "logps/rejected": -197.39797973632812, "loss": 0.2408, "rewards/accuracies": 1.0, "rewards/chosen": 0.3001018166542053, "rewards/margins": 1.817267894744873, "rewards/rejected": -1.5171661376953125, "step": 3246 }, { "epoch": 0.37, "learning_rate": 1.9057708065082523e-07, "logits/chosen": -3.5678553581237793, "logits/rejected": -3.436237335205078, "logps/chosen": -267.3758544921875, "logps/rejected": -264.5841369628906, "loss": 0.6619, "rewards/accuracies": 0.625, "rewards/chosen": -0.5816099047660828, "rewards/margins": 1.607773780822754, "rewards/rejected": -2.1893835067749023, "step": 3247 }, { "epoch": 0.37, "learning_rate": 1.9054196418120098e-07, "logits/chosen": -2.9175918102264404, "logits/rejected": -2.8050460815429688, "logps/chosen": -126.16608428955078, "logps/rejected": -160.1228790283203, "loss": 0.3407, "rewards/accuracies": 0.875, "rewards/chosen": -0.20938964188098907, "rewards/margins": 1.9538230895996094, "rewards/rejected": -2.163212776184082, "step": 3248 }, { "epoch": 0.37, "learning_rate": 1.9050684771157674e-07, "logits/chosen": -3.511631965637207, "logits/rejected": -3.0988759994506836, "logps/chosen": -118.99443054199219, "logps/rejected": -158.79519653320312, "loss": 0.5502, "rewards/accuracies": 0.75, "rewards/chosen": 0.25886887311935425, "rewards/margins": 0.9568430185317993, "rewards/rejected": -0.6979742050170898, "step": 3249 }, { "epoch": 0.37, "learning_rate": 1.9047173124195246e-07, "logits/chosen": -3.471848964691162, "logits/rejected": -3.287520408630371, "logps/chosen": -266.87237548828125, "logps/rejected": -211.9257354736328, "loss": 0.2189, "rewards/accuracies": 1.0, "rewards/chosen": 0.03501468151807785, "rewards/margins": 1.7859731912612915, "rewards/rejected": -1.7509586811065674, "step": 3250 }, { "epoch": 0.37, "learning_rate": 1.9043661477232822e-07, "logits/chosen": -3.8052945137023926, "logits/rejected": -3.6344809532165527, "logps/chosen": -230.63682556152344, "logps/rejected": -255.5173797607422, "loss": 0.2833, "rewards/accuracies": 0.875, "rewards/chosen": 0.2045193910598755, "rewards/margins": 3.4252254962921143, "rewards/rejected": -3.2207062244415283, "step": 3251 }, { "epoch": 0.37, "learning_rate": 1.9040149830270394e-07, "logits/chosen": -2.6787590980529785, "logits/rejected": -2.616711139678955, "logps/chosen": -348.7457275390625, "logps/rejected": -270.74237060546875, "loss": 0.2097, "rewards/accuracies": 1.0, "rewards/chosen": -0.19256411492824554, "rewards/margins": 1.8544222116470337, "rewards/rejected": -2.0469861030578613, "step": 3252 }, { "epoch": 0.38, "learning_rate": 1.903663818330797e-07, "logits/chosen": -3.0587100982666016, "logits/rejected": -3.1048736572265625, "logps/chosen": -184.961669921875, "logps/rejected": -290.9366149902344, "loss": 0.2589, "rewards/accuracies": 0.75, "rewards/chosen": 0.23616443574428558, "rewards/margins": 3.8599915504455566, "rewards/rejected": -3.6238269805908203, "step": 3253 }, { "epoch": 0.38, "learning_rate": 1.9033126536345545e-07, "logits/chosen": -3.1983935832977295, "logits/rejected": -3.4304795265197754, "logps/chosen": -163.0640411376953, "logps/rejected": -186.475341796875, "loss": 0.658, "rewards/accuracies": 0.375, "rewards/chosen": -0.5146329998970032, "rewards/margins": 1.099518060684204, "rewards/rejected": -1.6141510009765625, "step": 3254 }, { "epoch": 0.38, "learning_rate": 1.9029614889383118e-07, "logits/chosen": -2.4606666564941406, "logits/rejected": -2.5099620819091797, "logps/chosen": -309.9217529296875, "logps/rejected": -276.42474365234375, "loss": 0.349, "rewards/accuracies": 1.0, "rewards/chosen": -0.22629261016845703, "rewards/margins": 1.2124695777893066, "rewards/rejected": -1.4387621879577637, "step": 3255 }, { "epoch": 0.38, "learning_rate": 1.9026103242420693e-07, "logits/chosen": -3.728325128555298, "logits/rejected": -3.4861292839050293, "logps/chosen": -186.12265014648438, "logps/rejected": -196.19287109375, "loss": 0.4437, "rewards/accuracies": 0.875, "rewards/chosen": -0.6350020170211792, "rewards/margins": 2.339303970336914, "rewards/rejected": -2.974306106567383, "step": 3256 }, { "epoch": 0.38, "learning_rate": 1.9022591595458271e-07, "logits/chosen": -3.241154193878174, "logits/rejected": -3.0962798595428467, "logps/chosen": -413.33782958984375, "logps/rejected": -260.85430908203125, "loss": 0.1589, "rewards/accuracies": 1.0, "rewards/chosen": 0.6960663795471191, "rewards/margins": 2.5299081802368164, "rewards/rejected": -1.8338415622711182, "step": 3257 }, { "epoch": 0.38, "learning_rate": 1.9019079948495844e-07, "logits/chosen": -2.176119804382324, "logits/rejected": -2.141174077987671, "logps/chosen": -394.9962158203125, "logps/rejected": -331.6242370605469, "loss": 0.233, "rewards/accuracies": 0.875, "rewards/chosen": 0.6488301157951355, "rewards/margins": 1.909548044204712, "rewards/rejected": -1.2607176303863525, "step": 3258 }, { "epoch": 0.38, "learning_rate": 1.901556830153342e-07, "logits/chosen": -3.1232521533966064, "logits/rejected": -3.1128177642822266, "logps/chosen": -162.72921752929688, "logps/rejected": -283.23638916015625, "loss": 0.3529, "rewards/accuracies": 0.875, "rewards/chosen": 0.015923619270324707, "rewards/margins": 2.4470670223236084, "rewards/rejected": -2.431143283843994, "step": 3259 }, { "epoch": 0.38, "learning_rate": 1.9012056654570992e-07, "logits/chosen": -3.2230913639068604, "logits/rejected": -3.1792685985565186, "logps/chosen": -159.42027282714844, "logps/rejected": -262.39483642578125, "loss": 0.5293, "rewards/accuracies": 0.875, "rewards/chosen": -0.43209201097488403, "rewards/margins": 0.7172941565513611, "rewards/rejected": -1.1493862867355347, "step": 3260 }, { "epoch": 0.38, "learning_rate": 1.9008545007608568e-07, "logits/chosen": -3.58026385307312, "logits/rejected": -3.494145154953003, "logps/chosen": -169.50152587890625, "logps/rejected": -193.07992553710938, "loss": 0.2871, "rewards/accuracies": 1.0, "rewards/chosen": -0.615800678730011, "rewards/margins": 1.4299113750457764, "rewards/rejected": -2.0457119941711426, "step": 3261 }, { "epoch": 0.38, "learning_rate": 1.9005033360646143e-07, "logits/chosen": -2.9540281295776367, "logits/rejected": -3.2339887619018555, "logps/chosen": -383.470947265625, "logps/rejected": -237.59872436523438, "loss": 0.259, "rewards/accuracies": 0.875, "rewards/chosen": 0.30963021516799927, "rewards/margins": 2.005920171737671, "rewards/rejected": -1.6962900161743164, "step": 3262 }, { "epoch": 0.38, "learning_rate": 1.9001521713683716e-07, "logits/chosen": -3.5184195041656494, "logits/rejected": -3.2213165760040283, "logps/chosen": -282.95556640625, "logps/rejected": -244.77105712890625, "loss": 0.5145, "rewards/accuracies": 0.625, "rewards/chosen": -0.09156637638807297, "rewards/margins": 0.8782737851142883, "rewards/rejected": -0.9698401689529419, "step": 3263 }, { "epoch": 0.38, "learning_rate": 1.899801006672129e-07, "logits/chosen": -2.4168806076049805, "logits/rejected": -2.424252986907959, "logps/chosen": -339.12725830078125, "logps/rejected": -286.3655700683594, "loss": 0.5886, "rewards/accuracies": 0.5, "rewards/chosen": -0.5175842046737671, "rewards/margins": 0.7221792936325073, "rewards/rejected": -1.2397634983062744, "step": 3264 }, { "epoch": 0.38, "learning_rate": 1.8994498419758867e-07, "logits/chosen": -2.749178409576416, "logits/rejected": -2.38816499710083, "logps/chosen": -572.865478515625, "logps/rejected": -271.8323974609375, "loss": 0.3617, "rewards/accuracies": 0.75, "rewards/chosen": 0.689293384552002, "rewards/margins": 1.9038830995559692, "rewards/rejected": -1.2145897150039673, "step": 3265 }, { "epoch": 0.38, "learning_rate": 1.899098677279644e-07, "logits/chosen": -3.4367265701293945, "logits/rejected": -3.70025634765625, "logps/chosen": -63.923343658447266, "logps/rejected": -142.77394104003906, "loss": 0.3483, "rewards/accuracies": 0.75, "rewards/chosen": 0.06515637040138245, "rewards/margins": 1.7900564670562744, "rewards/rejected": -1.7249001264572144, "step": 3266 }, { "epoch": 0.38, "learning_rate": 1.8987475125834017e-07, "logits/chosen": -2.6999218463897705, "logits/rejected": -2.6521661281585693, "logps/chosen": -130.17149353027344, "logps/rejected": -212.41921997070312, "loss": 0.3818, "rewards/accuracies": 0.625, "rewards/chosen": 0.001506030559539795, "rewards/margins": 1.44063138961792, "rewards/rejected": -1.4391252994537354, "step": 3267 }, { "epoch": 0.38, "learning_rate": 1.8983963478871587e-07, "logits/chosen": -3.3422892093658447, "logits/rejected": -2.9832239151000977, "logps/chosen": -389.35394287109375, "logps/rejected": -366.3259582519531, "loss": 0.2645, "rewards/accuracies": 0.875, "rewards/chosen": 0.19954347610473633, "rewards/margins": 1.9503593444824219, "rewards/rejected": -1.7508158683776855, "step": 3268 }, { "epoch": 0.38, "learning_rate": 1.8980451831909165e-07, "logits/chosen": -3.8020968437194824, "logits/rejected": -3.816239356994629, "logps/chosen": -239.0646209716797, "logps/rejected": -233.32949829101562, "loss": 0.3473, "rewards/accuracies": 1.0, "rewards/chosen": 0.3341495990753174, "rewards/margins": 1.4127821922302246, "rewards/rejected": -1.0786325931549072, "step": 3269 }, { "epoch": 0.38, "learning_rate": 1.897694018494674e-07, "logits/chosen": -2.644350528717041, "logits/rejected": -2.427365779876709, "logps/chosen": -273.2784729003906, "logps/rejected": -319.6897888183594, "loss": 0.5051, "rewards/accuracies": 0.75, "rewards/chosen": -0.2436281144618988, "rewards/margins": 1.0179781913757324, "rewards/rejected": -1.2616063356399536, "step": 3270 }, { "epoch": 0.38, "learning_rate": 1.8973428537984314e-07, "logits/chosen": -2.8985328674316406, "logits/rejected": -2.7413225173950195, "logps/chosen": -337.0899963378906, "logps/rejected": -204.57928466796875, "loss": 0.574, "rewards/accuracies": 0.75, "rewards/chosen": -0.7751039862632751, "rewards/margins": 0.9183584451675415, "rewards/rejected": -1.6934623718261719, "step": 3271 }, { "epoch": 0.38, "learning_rate": 1.896991689102189e-07, "logits/chosen": -3.141148090362549, "logits/rejected": -3.1746315956115723, "logps/chosen": -262.5524597167969, "logps/rejected": -167.91156005859375, "loss": 0.4441, "rewards/accuracies": 0.75, "rewards/chosen": 0.8348259925842285, "rewards/margins": 2.5769550800323486, "rewards/rejected": -1.7421290874481201, "step": 3272 }, { "epoch": 0.38, "learning_rate": 1.8966405244059464e-07, "logits/chosen": -3.045548915863037, "logits/rejected": -3.4743990898132324, "logps/chosen": -204.1428680419922, "logps/rejected": -284.0826721191406, "loss": 0.8844, "rewards/accuracies": 0.5, "rewards/chosen": 0.05128922313451767, "rewards/margins": 0.23392778635025024, "rewards/rejected": -0.18263855576515198, "step": 3273 }, { "epoch": 0.38, "learning_rate": 1.8962893597097037e-07, "logits/chosen": -2.5927233695983887, "logits/rejected": -2.7178752422332764, "logps/chosen": -216.53982543945312, "logps/rejected": -270.5825500488281, "loss": 0.2049, "rewards/accuracies": 1.0, "rewards/chosen": 0.11347056925296783, "rewards/margins": 2.8802762031555176, "rewards/rejected": -2.76680588722229, "step": 3274 }, { "epoch": 0.38, "learning_rate": 1.8959381950134612e-07, "logits/chosen": -3.5513343811035156, "logits/rejected": -3.615835189819336, "logps/chosen": -191.34625244140625, "logps/rejected": -233.92471313476562, "loss": 0.7685, "rewards/accuracies": 0.5, "rewards/chosen": -0.7832320928573608, "rewards/margins": 1.2523956298828125, "rewards/rejected": -2.035627603530884, "step": 3275 }, { "epoch": 0.38, "learning_rate": 1.8955870303172185e-07, "logits/chosen": -2.82460880279541, "logits/rejected": -2.5946860313415527, "logps/chosen": -385.426025390625, "logps/rejected": -193.66998291015625, "loss": 0.4562, "rewards/accuracies": 0.75, "rewards/chosen": 0.023731417953968048, "rewards/margins": 1.501528024673462, "rewards/rejected": -1.4777965545654297, "step": 3276 }, { "epoch": 0.38, "learning_rate": 1.895235865620976e-07, "logits/chosen": -2.7012245655059814, "logits/rejected": -2.9006242752075195, "logps/chosen": -313.27178955078125, "logps/rejected": -230.33370971679688, "loss": 0.2867, "rewards/accuracies": 0.875, "rewards/chosen": 0.48774468898773193, "rewards/margins": 1.5161164999008179, "rewards/rejected": -1.028371810913086, "step": 3277 }, { "epoch": 0.38, "learning_rate": 1.8948847009247339e-07, "logits/chosen": -3.764421224594116, "logits/rejected": -4.064428329467773, "logps/chosen": -162.78953552246094, "logps/rejected": -212.28662109375, "loss": 0.3659, "rewards/accuracies": 0.875, "rewards/chosen": 0.533346951007843, "rewards/margins": 1.8389813899993896, "rewards/rejected": -1.3056344985961914, "step": 3278 }, { "epoch": 0.38, "learning_rate": 1.894533536228491e-07, "logits/chosen": -3.533937931060791, "logits/rejected": -3.724313735961914, "logps/chosen": -202.3952178955078, "logps/rejected": -204.21546936035156, "loss": 0.3533, "rewards/accuracies": 1.0, "rewards/chosen": 0.0708017647266388, "rewards/margins": 1.7498159408569336, "rewards/rejected": -1.6790142059326172, "step": 3279 }, { "epoch": 0.38, "learning_rate": 1.8941823715322487e-07, "logits/chosen": -3.6773629188537598, "logits/rejected": -3.9739575386047363, "logps/chosen": -174.67254638671875, "logps/rejected": -215.70721435546875, "loss": 0.547, "rewards/accuracies": 0.625, "rewards/chosen": -0.20336365699768066, "rewards/margins": 1.1152557134628296, "rewards/rejected": -1.3186193704605103, "step": 3280 }, { "epoch": 0.38, "learning_rate": 1.893831206836006e-07, "logits/chosen": -2.8084702491760254, "logits/rejected": -3.1349902153015137, "logps/chosen": -168.08966064453125, "logps/rejected": -272.7445983886719, "loss": 0.2446, "rewards/accuracies": 1.0, "rewards/chosen": -0.193680539727211, "rewards/margins": 1.890684723854065, "rewards/rejected": -2.0843653678894043, "step": 3281 }, { "epoch": 0.38, "learning_rate": 1.8934800421397635e-07, "logits/chosen": -3.5066637992858887, "logits/rejected": -3.101552724838257, "logps/chosen": -569.9415893554688, "logps/rejected": -285.5455017089844, "loss": 1.45, "rewards/accuracies": 0.75, "rewards/chosen": -1.009317398071289, "rewards/margins": 0.7138168811798096, "rewards/rejected": -1.7231343984603882, "step": 3282 }, { "epoch": 0.38, "learning_rate": 1.893128877443521e-07, "logits/chosen": -3.3261404037475586, "logits/rejected": -3.154379367828369, "logps/chosen": -425.2516784667969, "logps/rejected": -282.50958251953125, "loss": 0.334, "rewards/accuracies": 0.75, "rewards/chosen": 0.5665931105613708, "rewards/margins": 1.9900599718093872, "rewards/rejected": -1.4234668016433716, "step": 3283 }, { "epoch": 0.38, "learning_rate": 1.8927777127472783e-07, "logits/chosen": -3.350087881088257, "logits/rejected": -3.349928855895996, "logps/chosen": -190.12283325195312, "logps/rejected": -154.02249145507812, "loss": 0.8403, "rewards/accuracies": 0.375, "rewards/chosen": -0.8056511282920837, "rewards/margins": 0.15024809539318085, "rewards/rejected": -0.9558992385864258, "step": 3284 }, { "epoch": 0.38, "learning_rate": 1.8924265480510358e-07, "logits/chosen": -3.7102129459381104, "logits/rejected": -3.347630739212036, "logps/chosen": -202.17469787597656, "logps/rejected": -193.7458038330078, "loss": 0.4459, "rewards/accuracies": 0.75, "rewards/chosen": -0.030800748616456985, "rewards/margins": 0.9983671307563782, "rewards/rejected": -1.029167890548706, "step": 3285 }, { "epoch": 0.38, "learning_rate": 1.8920753833547934e-07, "logits/chosen": -3.7895452976226807, "logits/rejected": -3.3033580780029297, "logps/chosen": -367.5447082519531, "logps/rejected": -319.93499755859375, "loss": 0.2828, "rewards/accuracies": 0.875, "rewards/chosen": 0.19426314532756805, "rewards/margins": 1.879339337348938, "rewards/rejected": -1.685076117515564, "step": 3286 }, { "epoch": 0.38, "learning_rate": 1.8917242186585506e-07, "logits/chosen": -2.6906075477600098, "logits/rejected": -2.460141658782959, "logps/chosen": -288.1797180175781, "logps/rejected": -218.65945434570312, "loss": 0.2706, "rewards/accuracies": 1.0, "rewards/chosen": 0.3034425973892212, "rewards/margins": 2.683164358139038, "rewards/rejected": -2.3797216415405273, "step": 3287 }, { "epoch": 0.38, "learning_rate": 1.8913730539623082e-07, "logits/chosen": -3.032623767852783, "logits/rejected": -3.402095079421997, "logps/chosen": -375.28570556640625, "logps/rejected": -320.8821716308594, "loss": 0.5842, "rewards/accuracies": 0.625, "rewards/chosen": 0.09593716263771057, "rewards/margins": 0.6738909482955933, "rewards/rejected": -0.5779538154602051, "step": 3288 }, { "epoch": 0.38, "learning_rate": 1.8910218892660655e-07, "logits/chosen": -2.9447529315948486, "logits/rejected": -2.9131171703338623, "logps/chosen": -219.88800048828125, "logps/rejected": -269.6672058105469, "loss": 0.6481, "rewards/accuracies": 0.625, "rewards/chosen": -0.861519455909729, "rewards/margins": 0.6639772057533264, "rewards/rejected": -1.5254967212677002, "step": 3289 }, { "epoch": 0.38, "learning_rate": 1.890670724569823e-07, "logits/chosen": -3.296943187713623, "logits/rejected": -3.4018473625183105, "logps/chosen": -467.2939147949219, "logps/rejected": -227.11643981933594, "loss": 0.2872, "rewards/accuracies": 1.0, "rewards/chosen": 0.03399544954299927, "rewards/margins": 1.8533363342285156, "rewards/rejected": -1.8193409442901611, "step": 3290 }, { "epoch": 0.38, "learning_rate": 1.8903195598735808e-07, "logits/chosen": -2.2892041206359863, "logits/rejected": -2.437941074371338, "logps/chosen": -474.3783264160156, "logps/rejected": -494.00457763671875, "loss": 0.5919, "rewards/accuracies": 0.625, "rewards/chosen": -0.2495562732219696, "rewards/margins": 1.4580421447753906, "rewards/rejected": -1.7075984477996826, "step": 3291 }, { "epoch": 0.38, "learning_rate": 1.889968395177338e-07, "logits/chosen": -2.958592176437378, "logits/rejected": -3.1123037338256836, "logps/chosen": -283.4925231933594, "logps/rejected": -296.1649169921875, "loss": 0.4877, "rewards/accuracies": 0.625, "rewards/chosen": -0.5664748549461365, "rewards/margins": 1.1059414148330688, "rewards/rejected": -1.6724162101745605, "step": 3292 }, { "epoch": 0.38, "learning_rate": 1.8896172304810956e-07, "logits/chosen": -3.092756748199463, "logits/rejected": -2.7519452571868896, "logps/chosen": -398.3504943847656, "logps/rejected": -432.1631164550781, "loss": 0.4874, "rewards/accuracies": 0.625, "rewards/chosen": 0.11511875689029694, "rewards/margins": 1.6519618034362793, "rewards/rejected": -1.5368430614471436, "step": 3293 }, { "epoch": 0.38, "learning_rate": 1.8892660657848532e-07, "logits/chosen": -2.7783358097076416, "logits/rejected": -3.1314947605133057, "logps/chosen": -170.7704315185547, "logps/rejected": -155.86134338378906, "loss": 0.357, "rewards/accuracies": 0.75, "rewards/chosen": 0.6245697736740112, "rewards/margins": 1.5900131464004517, "rewards/rejected": -0.9654433131217957, "step": 3294 }, { "epoch": 0.38, "learning_rate": 1.8889149010886104e-07, "logits/chosen": -3.5736119747161865, "logits/rejected": -3.4967470169067383, "logps/chosen": -253.67776489257812, "logps/rejected": -166.7464141845703, "loss": 0.4302, "rewards/accuracies": 0.75, "rewards/chosen": -0.3428748846054077, "rewards/margins": 0.9962511658668518, "rewards/rejected": -1.3391261100769043, "step": 3295 }, { "epoch": 0.38, "learning_rate": 1.888563736392368e-07, "logits/chosen": -3.1321921348571777, "logits/rejected": -3.1431524753570557, "logps/chosen": -302.3958435058594, "logps/rejected": -138.30589294433594, "loss": 0.4415, "rewards/accuracies": 0.75, "rewards/chosen": -0.44807320833206177, "rewards/margins": 0.8661787509918213, "rewards/rejected": -1.3142518997192383, "step": 3296 }, { "epoch": 0.38, "learning_rate": 1.8882125716961252e-07, "logits/chosen": -3.398437976837158, "logits/rejected": -3.717824935913086, "logps/chosen": -202.67575073242188, "logps/rejected": -278.493408203125, "loss": 0.3292, "rewards/accuracies": 0.75, "rewards/chosen": 0.3177153170108795, "rewards/margins": 2.3680624961853027, "rewards/rejected": -2.050347328186035, "step": 3297 }, { "epoch": 0.38, "learning_rate": 1.8878614069998828e-07, "logits/chosen": -3.6961684226989746, "logits/rejected": -3.661113977432251, "logps/chosen": -197.9889373779297, "logps/rejected": -243.475341796875, "loss": 0.3915, "rewards/accuracies": 0.75, "rewards/chosen": -0.024217352271080017, "rewards/margins": 1.6054434776306152, "rewards/rejected": -1.6296608448028564, "step": 3298 }, { "epoch": 0.38, "learning_rate": 1.8875102423036403e-07, "logits/chosen": -3.4814982414245605, "logits/rejected": -3.3964343070983887, "logps/chosen": -311.9060974121094, "logps/rejected": -251.25796508789062, "loss": 0.6792, "rewards/accuracies": 0.75, "rewards/chosen": -0.00868266075849533, "rewards/margins": 0.6992079615592957, "rewards/rejected": -0.7078907489776611, "step": 3299 }, { "epoch": 0.38, "learning_rate": 1.8871590776073976e-07, "logits/chosen": -3.337003707885742, "logits/rejected": -3.4357104301452637, "logps/chosen": -126.36668395996094, "logps/rejected": -159.96090698242188, "loss": 0.1661, "rewards/accuracies": 0.875, "rewards/chosen": 0.4924886226654053, "rewards/margins": 2.4605462551116943, "rewards/rejected": -1.96805739402771, "step": 3300 }, { "epoch": 0.38, "learning_rate": 1.8868079129111554e-07, "logits/chosen": -3.319650173187256, "logits/rejected": -3.018343925476074, "logps/chosen": -329.4004821777344, "logps/rejected": -144.05502319335938, "loss": 0.518, "rewards/accuracies": 0.625, "rewards/chosen": -0.7958881258964539, "rewards/margins": 0.984311580657959, "rewards/rejected": -1.7801995277404785, "step": 3301 }, { "epoch": 0.38, "learning_rate": 1.886456748214913e-07, "logits/chosen": -3.367759943008423, "logits/rejected": -3.259202480316162, "logps/chosen": -341.15313720703125, "logps/rejected": -376.1211242675781, "loss": 0.3514, "rewards/accuracies": 0.875, "rewards/chosen": -0.07507112622261047, "rewards/margins": 2.022493839263916, "rewards/rejected": -2.097564935684204, "step": 3302 }, { "epoch": 0.38, "learning_rate": 1.8861055835186702e-07, "logits/chosen": -3.0694632530212402, "logits/rejected": -3.2107715606689453, "logps/chosen": -258.094482421875, "logps/rejected": -270.24658203125, "loss": 0.3799, "rewards/accuracies": 0.875, "rewards/chosen": -0.45277899503707886, "rewards/margins": 1.5561782121658325, "rewards/rejected": -2.0089573860168457, "step": 3303 }, { "epoch": 0.38, "learning_rate": 1.8857544188224277e-07, "logits/chosen": -3.002181053161621, "logits/rejected": -3.022519826889038, "logps/chosen": -273.4798278808594, "logps/rejected": -330.8305969238281, "loss": 0.9528, "rewards/accuracies": 0.5, "rewards/chosen": -0.613883912563324, "rewards/margins": 0.42271921038627625, "rewards/rejected": -1.0366029739379883, "step": 3304 }, { "epoch": 0.38, "learning_rate": 1.885403254126185e-07, "logits/chosen": -3.3664557933807373, "logits/rejected": -3.200284004211426, "logps/chosen": -222.54446411132812, "logps/rejected": -227.20130920410156, "loss": 0.5178, "rewards/accuracies": 0.875, "rewards/chosen": -0.03998154401779175, "rewards/margins": 1.129910945892334, "rewards/rejected": -1.1698925495147705, "step": 3305 }, { "epoch": 0.38, "learning_rate": 1.8850520894299426e-07, "logits/chosen": -3.821786642074585, "logits/rejected": -3.862496852874756, "logps/chosen": -254.02020263671875, "logps/rejected": -368.3092956542969, "loss": 0.2136, "rewards/accuracies": 1.0, "rewards/chosen": -0.23757895827293396, "rewards/margins": 3.0252878665924072, "rewards/rejected": -3.262866973876953, "step": 3306 }, { "epoch": 0.38, "learning_rate": 1.8847009247337e-07, "logits/chosen": -3.1608047485351562, "logits/rejected": -3.0649266242980957, "logps/chosen": -275.3810729980469, "logps/rejected": -256.4399108886719, "loss": 0.2291, "rewards/accuracies": 1.0, "rewards/chosen": -0.03167836368083954, "rewards/margins": 2.303206205368042, "rewards/rejected": -2.3348846435546875, "step": 3307 }, { "epoch": 0.38, "learning_rate": 1.8843497600374574e-07, "logits/chosen": -3.1852426528930664, "logits/rejected": -3.001401424407959, "logps/chosen": -197.1796875, "logps/rejected": -186.481689453125, "loss": 0.2805, "rewards/accuracies": 0.875, "rewards/chosen": -0.089028000831604, "rewards/margins": 2.0856070518493652, "rewards/rejected": -2.174635171890259, "step": 3308 }, { "epoch": 0.38, "learning_rate": 1.883998595341215e-07, "logits/chosen": -2.9746756553649902, "logits/rejected": -3.350696086883545, "logps/chosen": -167.04098510742188, "logps/rejected": -193.2460479736328, "loss": 0.5921, "rewards/accuracies": 0.5, "rewards/chosen": 0.1360245645046234, "rewards/margins": 0.7690789103507996, "rewards/rejected": -0.6330543756484985, "step": 3309 }, { "epoch": 0.38, "learning_rate": 1.8836474306449724e-07, "logits/chosen": -3.5847833156585693, "logits/rejected": -3.4554836750030518, "logps/chosen": -204.88865661621094, "logps/rejected": -183.84750366210938, "loss": 0.2218, "rewards/accuracies": 1.0, "rewards/chosen": 0.02667609415948391, "rewards/margins": 2.6765217781066895, "rewards/rejected": -2.649845600128174, "step": 3310 }, { "epoch": 0.38, "learning_rate": 1.8832962659487297e-07, "logits/chosen": -3.1703643798828125, "logits/rejected": -3.0827040672302246, "logps/chosen": -235.3411102294922, "logps/rejected": -165.66087341308594, "loss": 0.311, "rewards/accuracies": 1.0, "rewards/chosen": 0.21020975708961487, "rewards/margins": 1.6115771532058716, "rewards/rejected": -1.401367425918579, "step": 3311 }, { "epoch": 0.38, "learning_rate": 1.8829451012524875e-07, "logits/chosen": -2.3361592292785645, "logits/rejected": -2.603630781173706, "logps/chosen": -327.1177673339844, "logps/rejected": -256.0703430175781, "loss": 0.3917, "rewards/accuracies": 0.875, "rewards/chosen": -0.21818825602531433, "rewards/margins": 1.1059364080429077, "rewards/rejected": -1.3241246938705444, "step": 3312 }, { "epoch": 0.38, "learning_rate": 1.8825939365562445e-07, "logits/chosen": -3.5835318565368652, "logits/rejected": -3.33602237701416, "logps/chosen": -198.59976196289062, "logps/rejected": -329.790771484375, "loss": 0.5615, "rewards/accuracies": 0.875, "rewards/chosen": 0.10201224684715271, "rewards/margins": 2.3762001991271973, "rewards/rejected": -2.2741878032684326, "step": 3313 }, { "epoch": 0.38, "learning_rate": 1.8822427718600023e-07, "logits/chosen": -3.6227874755859375, "logits/rejected": -3.7840752601623535, "logps/chosen": -245.32415771484375, "logps/rejected": -192.49459838867188, "loss": 0.3795, "rewards/accuracies": 0.75, "rewards/chosen": -0.2593570351600647, "rewards/margins": 2.066713333129883, "rewards/rejected": -2.3260703086853027, "step": 3314 }, { "epoch": 0.38, "learning_rate": 1.88189160716376e-07, "logits/chosen": -2.963606357574463, "logits/rejected": -3.1620724201202393, "logps/chosen": -315.806640625, "logps/rejected": -321.217041015625, "loss": 0.4938, "rewards/accuracies": 0.75, "rewards/chosen": 0.554469883441925, "rewards/margins": 1.31089448928833, "rewards/rejected": -0.7564246654510498, "step": 3315 }, { "epoch": 0.38, "learning_rate": 1.8815404424675171e-07, "logits/chosen": -3.6104540824890137, "logits/rejected": -3.508883476257324, "logps/chosen": -221.0469970703125, "logps/rejected": -322.13067626953125, "loss": 0.2489, "rewards/accuracies": 0.875, "rewards/chosen": -0.16562509536743164, "rewards/margins": 1.9476971626281738, "rewards/rejected": -2.1133222579956055, "step": 3316 }, { "epoch": 0.38, "learning_rate": 1.8811892777712747e-07, "logits/chosen": -2.955089569091797, "logits/rejected": -3.1377112865448, "logps/chosen": -357.8824157714844, "logps/rejected": -314.53961181640625, "loss": 0.3232, "rewards/accuracies": 0.75, "rewards/chosen": 0.1293627917766571, "rewards/margins": 2.441380262374878, "rewards/rejected": -2.3120179176330566, "step": 3317 }, { "epoch": 0.38, "learning_rate": 1.8808381130750322e-07, "logits/chosen": -1.7877302169799805, "logits/rejected": -1.9313466548919678, "logps/chosen": -372.9151306152344, "logps/rejected": -259.4946594238281, "loss": 0.5156, "rewards/accuracies": 0.625, "rewards/chosen": 0.021996498107910156, "rewards/margins": 1.2861825227737427, "rewards/rejected": -1.2641860246658325, "step": 3318 }, { "epoch": 0.38, "learning_rate": 1.8804869483787895e-07, "logits/chosen": -3.0780375003814697, "logits/rejected": -3.2801733016967773, "logps/chosen": -459.59814453125, "logps/rejected": -324.6257019042969, "loss": 0.4136, "rewards/accuracies": 0.875, "rewards/chosen": 0.36141911149024963, "rewards/margins": 2.255362033843994, "rewards/rejected": -1.8939428329467773, "step": 3319 }, { "epoch": 0.38, "learning_rate": 1.880135783682547e-07, "logits/chosen": -2.8265552520751953, "logits/rejected": -2.706533432006836, "logps/chosen": -271.15338134765625, "logps/rejected": -210.36805725097656, "loss": 0.5894, "rewards/accuracies": 0.75, "rewards/chosen": -0.4487724006175995, "rewards/margins": 0.7796653509140015, "rewards/rejected": -1.2284376621246338, "step": 3320 }, { "epoch": 0.38, "learning_rate": 1.8797846189863043e-07, "logits/chosen": -2.788576602935791, "logits/rejected": -2.840190887451172, "logps/chosen": -252.01242065429688, "logps/rejected": -175.6400604248047, "loss": 0.2582, "rewards/accuracies": 0.875, "rewards/chosen": 0.45263272523880005, "rewards/margins": 2.2169642448425293, "rewards/rejected": -1.764331579208374, "step": 3321 }, { "epoch": 0.38, "learning_rate": 1.8794334542900619e-07, "logits/chosen": -3.044832944869995, "logits/rejected": -3.179853916168213, "logps/chosen": -249.2722930908203, "logps/rejected": -153.52249145507812, "loss": 0.4038, "rewards/accuracies": 0.875, "rewards/chosen": 0.03893648087978363, "rewards/margins": 1.8686343431472778, "rewards/rejected": -1.829697847366333, "step": 3322 }, { "epoch": 0.38, "learning_rate": 1.8790822895938197e-07, "logits/chosen": -3.148273229598999, "logits/rejected": -3.119884490966797, "logps/chosen": -316.1927795410156, "logps/rejected": -268.95556640625, "loss": 0.3877, "rewards/accuracies": 0.75, "rewards/chosen": 0.07867990434169769, "rewards/margins": 1.0090584754943848, "rewards/rejected": -0.9303786158561707, "step": 3323 }, { "epoch": 0.38, "learning_rate": 1.8787311248975767e-07, "logits/chosen": -3.249379873275757, "logits/rejected": -3.315068244934082, "logps/chosen": -152.01266479492188, "logps/rejected": -138.59857177734375, "loss": 0.3809, "rewards/accuracies": 0.875, "rewards/chosen": 0.41191452741622925, "rewards/margins": 1.2860987186431885, "rewards/rejected": -0.8741841912269592, "step": 3324 }, { "epoch": 0.38, "learning_rate": 1.8783799602013345e-07, "logits/chosen": -3.19000244140625, "logits/rejected": -3.237889289855957, "logps/chosen": -261.4431457519531, "logps/rejected": -175.7769775390625, "loss": 0.3493, "rewards/accuracies": 0.875, "rewards/chosen": 0.40924137830734253, "rewards/margins": 1.1121751070022583, "rewards/rejected": -0.7029337882995605, "step": 3325 }, { "epoch": 0.38, "learning_rate": 1.878028795505092e-07, "logits/chosen": -3.0472631454467773, "logits/rejected": -3.134939432144165, "logps/chosen": -288.6307067871094, "logps/rejected": -355.745361328125, "loss": 0.8361, "rewards/accuracies": 0.625, "rewards/chosen": -0.3885285556316376, "rewards/margins": 0.8718860149383545, "rewards/rejected": -1.2604146003723145, "step": 3326 }, { "epoch": 0.38, "learning_rate": 1.8776776308088493e-07, "logits/chosen": -3.550727367401123, "logits/rejected": -3.308666944503784, "logps/chosen": -222.9954833984375, "logps/rejected": -203.93783569335938, "loss": 0.5892, "rewards/accuracies": 0.625, "rewards/chosen": -0.43419745564460754, "rewards/margins": 1.0974576473236084, "rewards/rejected": -1.5316550731658936, "step": 3327 }, { "epoch": 0.38, "learning_rate": 1.8773264661126068e-07, "logits/chosen": -2.3440239429473877, "logits/rejected": -2.456005573272705, "logps/chosen": -401.47705078125, "logps/rejected": -282.2621765136719, "loss": 0.3423, "rewards/accuracies": 0.875, "rewards/chosen": 0.2311014086008072, "rewards/margins": 2.3919286727905273, "rewards/rejected": -2.160827398300171, "step": 3328 }, { "epoch": 0.38, "learning_rate": 1.876975301416364e-07, "logits/chosen": -3.332796573638916, "logits/rejected": -2.918513536453247, "logps/chosen": -256.4441833496094, "logps/rejected": -156.6804962158203, "loss": 0.5172, "rewards/accuracies": 0.75, "rewards/chosen": -0.8432498574256897, "rewards/margins": 0.8773359060287476, "rewards/rejected": -1.720585823059082, "step": 3329 }, { "epoch": 0.38, "learning_rate": 1.8766241367201216e-07, "logits/chosen": -3.5055770874023438, "logits/rejected": -3.5941367149353027, "logps/chosen": -425.8380126953125, "logps/rejected": -350.90826416015625, "loss": 0.4307, "rewards/accuracies": 0.875, "rewards/chosen": -0.397757351398468, "rewards/margins": 2.2431368827819824, "rewards/rejected": -2.6408944129943848, "step": 3330 }, { "epoch": 0.38, "learning_rate": 1.8762729720238792e-07, "logits/chosen": -2.747596263885498, "logits/rejected": -2.6344313621520996, "logps/chosen": -438.404296875, "logps/rejected": -256.55108642578125, "loss": 0.1778, "rewards/accuracies": 1.0, "rewards/chosen": 0.46374815702438354, "rewards/margins": 1.9566004276275635, "rewards/rejected": -1.4928523302078247, "step": 3331 }, { "epoch": 0.38, "learning_rate": 1.8759218073276364e-07, "logits/chosen": -3.1625051498413086, "logits/rejected": -3.0398197174072266, "logps/chosen": -247.79641723632812, "logps/rejected": -221.09149169921875, "loss": 0.2824, "rewards/accuracies": 0.875, "rewards/chosen": -0.10505138337612152, "rewards/margins": 1.5449755191802979, "rewards/rejected": -1.650026798248291, "step": 3332 }, { "epoch": 0.38, "learning_rate": 1.875570642631394e-07, "logits/chosen": -3.540853977203369, "logits/rejected": -3.352689504623413, "logps/chosen": -303.53240966796875, "logps/rejected": -280.15130615234375, "loss": 0.6413, "rewards/accuracies": 0.75, "rewards/chosen": 0.4182283580303192, "rewards/margins": 1.3621182441711426, "rewards/rejected": -0.943889856338501, "step": 3333 }, { "epoch": 0.38, "learning_rate": 1.8752194779351513e-07, "logits/chosen": -3.3047995567321777, "logits/rejected": -3.3412954807281494, "logps/chosen": -158.80426025390625, "logps/rejected": -185.94676208496094, "loss": 0.2036, "rewards/accuracies": 1.0, "rewards/chosen": 0.1461183726787567, "rewards/margins": 2.3584775924682617, "rewards/rejected": -2.2123589515686035, "step": 3334 }, { "epoch": 0.38, "learning_rate": 1.874868313238909e-07, "logits/chosen": -3.198099136352539, "logits/rejected": -2.8970463275909424, "logps/chosen": -209.2010498046875, "logps/rejected": -178.73695373535156, "loss": 0.5164, "rewards/accuracies": 0.75, "rewards/chosen": 0.10556148737668991, "rewards/margins": 1.478636384010315, "rewards/rejected": -1.3730748891830444, "step": 3335 }, { "epoch": 0.38, "learning_rate": 1.8745171485426666e-07, "logits/chosen": -2.6294567584991455, "logits/rejected": -2.634214401245117, "logps/chosen": -401.44219970703125, "logps/rejected": -327.6624755859375, "loss": 0.3827, "rewards/accuracies": 0.75, "rewards/chosen": 0.06505288183689117, "rewards/margins": 1.1446897983551025, "rewards/rejected": -1.079636812210083, "step": 3336 }, { "epoch": 0.38, "learning_rate": 1.874165983846424e-07, "logits/chosen": -2.9999094009399414, "logits/rejected": -3.298171043395996, "logps/chosen": -246.87286376953125, "logps/rejected": -262.9950256347656, "loss": 0.6166, "rewards/accuracies": 0.625, "rewards/chosen": -0.4159756004810333, "rewards/margins": 1.1566616296768188, "rewards/rejected": -1.5726372003555298, "step": 3337 }, { "epoch": 0.38, "learning_rate": 1.8738148191501814e-07, "logits/chosen": -3.141669750213623, "logits/rejected": -2.815613269805908, "logps/chosen": -198.24844360351562, "logps/rejected": -353.8513488769531, "loss": 0.2832, "rewards/accuracies": 0.875, "rewards/chosen": 0.18997150659561157, "rewards/margins": 1.7183077335357666, "rewards/rejected": -1.5283362865447998, "step": 3338 }, { "epoch": 0.38, "learning_rate": 1.873463654453939e-07, "logits/chosen": -2.634459972381592, "logits/rejected": -2.607105016708374, "logps/chosen": -300.10382080078125, "logps/rejected": -222.62738037109375, "loss": 0.4495, "rewards/accuracies": 0.875, "rewards/chosen": -0.03068874031305313, "rewards/margins": 0.785773754119873, "rewards/rejected": -0.816462516784668, "step": 3339 }, { "epoch": 0.39, "learning_rate": 1.8731124897576962e-07, "logits/chosen": -2.624997615814209, "logits/rejected": -2.5127382278442383, "logps/chosen": -249.82827758789062, "logps/rejected": -276.1859130859375, "loss": 0.6184, "rewards/accuracies": 0.625, "rewards/chosen": 0.08863720297813416, "rewards/margins": 0.6619340181350708, "rewards/rejected": -0.573296844959259, "step": 3340 }, { "epoch": 0.39, "learning_rate": 1.8727613250614538e-07, "logits/chosen": -2.7503695487976074, "logits/rejected": -2.4913058280944824, "logps/chosen": -373.00689697265625, "logps/rejected": -292.4007873535156, "loss": 0.4127, "rewards/accuracies": 0.625, "rewards/chosen": 0.27091163396835327, "rewards/margins": 1.3581360578536987, "rewards/rejected": -1.0872244834899902, "step": 3341 }, { "epoch": 0.39, "learning_rate": 1.872410160365211e-07, "logits/chosen": -2.5320582389831543, "logits/rejected": -2.530043601989746, "logps/chosen": -215.9588623046875, "logps/rejected": -255.28515625, "loss": 0.5106, "rewards/accuracies": 0.75, "rewards/chosen": -0.018528848886489868, "rewards/margins": 2.9273273944854736, "rewards/rejected": -2.9458560943603516, "step": 3342 }, { "epoch": 0.39, "learning_rate": 1.8720589956689686e-07, "logits/chosen": -3.311363697052002, "logits/rejected": -3.3521389961242676, "logps/chosen": -302.8426208496094, "logps/rejected": -294.8957214355469, "loss": 0.3976, "rewards/accuracies": 0.75, "rewards/chosen": 0.04431906342506409, "rewards/margins": 1.8754771947860718, "rewards/rejected": -1.831157922744751, "step": 3343 }, { "epoch": 0.39, "learning_rate": 1.871707830972726e-07, "logits/chosen": -3.40177845954895, "logits/rejected": -3.2989864349365234, "logps/chosen": -334.1148681640625, "logps/rejected": -276.3096923828125, "loss": 0.3485, "rewards/accuracies": 0.75, "rewards/chosen": 0.017334014177322388, "rewards/margins": 2.0643246173858643, "rewards/rejected": -2.0469906330108643, "step": 3344 }, { "epoch": 0.39, "learning_rate": 1.8713566662764834e-07, "logits/chosen": -2.217054843902588, "logits/rejected": -2.5100722312927246, "logps/chosen": -247.73190307617188, "logps/rejected": -167.6239471435547, "loss": 0.6107, "rewards/accuracies": 0.875, "rewards/chosen": 0.353685200214386, "rewards/margins": 1.0655039548873901, "rewards/rejected": -0.7118187546730042, "step": 3345 }, { "epoch": 0.39, "learning_rate": 1.8710055015802412e-07, "logits/chosen": -2.178075075149536, "logits/rejected": -2.1576452255249023, "logps/chosen": -302.20660400390625, "logps/rejected": -269.6806945800781, "loss": 0.5126, "rewards/accuracies": 0.625, "rewards/chosen": -0.11869420111179352, "rewards/margins": 1.1605371236801147, "rewards/rejected": -1.279231309890747, "step": 3346 }, { "epoch": 0.39, "learning_rate": 1.8706543368839987e-07, "logits/chosen": -2.257629632949829, "logits/rejected": -2.4049737453460693, "logps/chosen": -288.4828186035156, "logps/rejected": -236.7098388671875, "loss": 0.3712, "rewards/accuracies": 0.875, "rewards/chosen": -0.2058131992816925, "rewards/margins": 1.362931728363037, "rewards/rejected": -1.5687451362609863, "step": 3347 }, { "epoch": 0.39, "learning_rate": 1.870303172187756e-07, "logits/chosen": -3.8431026935577393, "logits/rejected": -3.504307746887207, "logps/chosen": -195.62246704101562, "logps/rejected": -165.6244659423828, "loss": 0.8627, "rewards/accuracies": 0.5, "rewards/chosen": -0.9455024600028992, "rewards/margins": 0.38038721680641174, "rewards/rejected": -1.3258895874023438, "step": 3348 }, { "epoch": 0.39, "learning_rate": 1.8699520074915135e-07, "logits/chosen": -3.0325000286102295, "logits/rejected": -3.3411097526550293, "logps/chosen": -343.81414794921875, "logps/rejected": -287.38677978515625, "loss": 0.3539, "rewards/accuracies": 0.875, "rewards/chosen": -0.7902094125747681, "rewards/margins": 1.3343619108200073, "rewards/rejected": -2.1245713233947754, "step": 3349 }, { "epoch": 0.39, "learning_rate": 1.8696008427952708e-07, "logits/chosen": -3.4896583557128906, "logits/rejected": -3.298124313354492, "logps/chosen": -271.431884765625, "logps/rejected": -188.68923950195312, "loss": 0.8103, "rewards/accuracies": 0.375, "rewards/chosen": -0.7972546815872192, "rewards/margins": 0.2501929998397827, "rewards/rejected": -1.047447681427002, "step": 3350 }, { "epoch": 0.39, "learning_rate": 1.8692496780990284e-07, "logits/chosen": -3.444488048553467, "logits/rejected": -3.2263903617858887, "logps/chosen": -149.27359008789062, "logps/rejected": -232.07749938964844, "loss": 0.4041, "rewards/accuracies": 0.625, "rewards/chosen": 0.3413795232772827, "rewards/margins": 1.3979203701019287, "rewards/rejected": -1.056540846824646, "step": 3351 }, { "epoch": 0.39, "learning_rate": 1.868898513402786e-07, "logits/chosen": -3.362281560897827, "logits/rejected": -3.167978525161743, "logps/chosen": -286.9901428222656, "logps/rejected": -197.0650177001953, "loss": 0.4225, "rewards/accuracies": 0.875, "rewards/chosen": -0.2425033450126648, "rewards/margins": 1.3410884141921997, "rewards/rejected": -1.5835916996002197, "step": 3352 }, { "epoch": 0.39, "learning_rate": 1.8685473487065432e-07, "logits/chosen": -3.1142430305480957, "logits/rejected": -3.0576536655426025, "logps/chosen": -329.6435241699219, "logps/rejected": -333.5663146972656, "loss": 0.6718, "rewards/accuracies": 0.625, "rewards/chosen": -0.780022919178009, "rewards/margins": 0.30121511220932007, "rewards/rejected": -1.081238031387329, "step": 3353 }, { "epoch": 0.39, "learning_rate": 1.8681961840103007e-07, "logits/chosen": -3.5235843658447266, "logits/rejected": -3.370985269546509, "logps/chosen": -186.47865295410156, "logps/rejected": -166.6328887939453, "loss": 0.4251, "rewards/accuracies": 0.75, "rewards/chosen": 0.1238468587398529, "rewards/margins": 1.4607198238372803, "rewards/rejected": -1.336872935295105, "step": 3354 }, { "epoch": 0.39, "learning_rate": 1.8678450193140582e-07, "logits/chosen": -2.620180606842041, "logits/rejected": -2.7486674785614014, "logps/chosen": -138.8900909423828, "logps/rejected": -282.7743835449219, "loss": 0.4456, "rewards/accuracies": 0.875, "rewards/chosen": -0.12089106440544128, "rewards/margins": 1.9135546684265137, "rewards/rejected": -2.0344457626342773, "step": 3355 }, { "epoch": 0.39, "learning_rate": 1.8674938546178155e-07, "logits/chosen": -3.702939033508301, "logits/rejected": -3.7201905250549316, "logps/chosen": -227.0693359375, "logps/rejected": -319.1444091796875, "loss": 0.5938, "rewards/accuracies": 0.75, "rewards/chosen": -0.8549561500549316, "rewards/margins": 1.06113862991333, "rewards/rejected": -1.9160947799682617, "step": 3356 }, { "epoch": 0.39, "learning_rate": 1.8671426899215733e-07, "logits/chosen": -3.2310216426849365, "logits/rejected": -3.7196450233459473, "logps/chosen": -177.603759765625, "logps/rejected": -244.32870483398438, "loss": 0.2985, "rewards/accuracies": 0.875, "rewards/chosen": 0.414431095123291, "rewards/margins": 2.636556625366211, "rewards/rejected": -2.22212553024292, "step": 3357 }, { "epoch": 0.39, "learning_rate": 1.8667915252253303e-07, "logits/chosen": -2.5573692321777344, "logits/rejected": -2.7154250144958496, "logps/chosen": -417.4433898925781, "logps/rejected": -195.0287628173828, "loss": 0.4056, "rewards/accuracies": 0.875, "rewards/chosen": 0.22184929251670837, "rewards/margins": 1.4430923461914062, "rewards/rejected": -1.221243143081665, "step": 3358 }, { "epoch": 0.39, "learning_rate": 1.866440360529088e-07, "logits/chosen": -3.3796443939208984, "logits/rejected": -3.373166799545288, "logps/chosen": -286.3492736816406, "logps/rejected": -225.70712280273438, "loss": 0.5381, "rewards/accuracies": 0.625, "rewards/chosen": -0.18584302067756653, "rewards/margins": 1.0121279954910278, "rewards/rejected": -1.1979711055755615, "step": 3359 }, { "epoch": 0.39, "learning_rate": 1.8660891958328457e-07, "logits/chosen": -2.8926033973693848, "logits/rejected": -2.9248368740081787, "logps/chosen": -439.7306823730469, "logps/rejected": -297.16912841796875, "loss": 0.0978, "rewards/accuracies": 1.0, "rewards/chosen": 1.1083266735076904, "rewards/margins": 3.0434398651123047, "rewards/rejected": -1.9351134300231934, "step": 3360 }, { "epoch": 0.39, "learning_rate": 1.865738031136603e-07, "logits/chosen": -2.720067262649536, "logits/rejected": -2.682982921600342, "logps/chosen": -400.87786865234375, "logps/rejected": -284.9651794433594, "loss": 0.4286, "rewards/accuracies": 0.75, "rewards/chosen": -0.31100887060165405, "rewards/margins": 1.0271086692810059, "rewards/rejected": -1.3381175994873047, "step": 3361 }, { "epoch": 0.39, "learning_rate": 1.8653868664403605e-07, "logits/chosen": -2.1397862434387207, "logits/rejected": -2.1891415119171143, "logps/chosen": -208.695556640625, "logps/rejected": -238.9937286376953, "loss": 0.2815, "rewards/accuracies": 0.75, "rewards/chosen": -0.09859620034694672, "rewards/margins": 2.2002899646759033, "rewards/rejected": -2.2988858222961426, "step": 3362 }, { "epoch": 0.39, "learning_rate": 1.865035701744118e-07, "logits/chosen": -2.6101458072662354, "logits/rejected": -2.943380355834961, "logps/chosen": -317.00274658203125, "logps/rejected": -381.96014404296875, "loss": 0.2555, "rewards/accuracies": 1.0, "rewards/chosen": -0.07327142357826233, "rewards/margins": 2.0773184299468994, "rewards/rejected": -2.150589942932129, "step": 3363 }, { "epoch": 0.39, "learning_rate": 1.8646845370478753e-07, "logits/chosen": -3.013810157775879, "logits/rejected": -3.291996479034424, "logps/chosen": -269.6859130859375, "logps/rejected": -278.3575744628906, "loss": 0.2278, "rewards/accuracies": 1.0, "rewards/chosen": 0.23223423957824707, "rewards/margins": 2.323197841644287, "rewards/rejected": -2.090963363647461, "step": 3364 }, { "epoch": 0.39, "learning_rate": 1.8643333723516328e-07, "logits/chosen": -3.0004775524139404, "logits/rejected": -2.9746522903442383, "logps/chosen": -195.71728515625, "logps/rejected": -351.3665771484375, "loss": 0.2832, "rewards/accuracies": 0.875, "rewards/chosen": 0.29655030369758606, "rewards/margins": 1.7599458694458008, "rewards/rejected": -1.4633957147598267, "step": 3365 }, { "epoch": 0.39, "learning_rate": 1.86398220765539e-07, "logits/chosen": -2.5990443229675293, "logits/rejected": -2.7844252586364746, "logps/chosen": -370.12939453125, "logps/rejected": -264.8372497558594, "loss": 0.3081, "rewards/accuracies": 0.875, "rewards/chosen": 0.2996978759765625, "rewards/margins": 1.8699290752410889, "rewards/rejected": -1.570231318473816, "step": 3366 }, { "epoch": 0.39, "learning_rate": 1.8636310429591476e-07, "logits/chosen": -2.7141056060791016, "logits/rejected": -2.564105987548828, "logps/chosen": -134.06741333007812, "logps/rejected": -263.7242431640625, "loss": 1.004, "rewards/accuracies": 0.75, "rewards/chosen": -0.055227622389793396, "rewards/margins": 0.3429550230503082, "rewards/rejected": -0.39818263053894043, "step": 3367 }, { "epoch": 0.39, "learning_rate": 1.8632798782629054e-07, "logits/chosen": -2.9558935165405273, "logits/rejected": -2.7172017097473145, "logps/chosen": -300.5688781738281, "logps/rejected": -295.043212890625, "loss": 0.2361, "rewards/accuracies": 0.875, "rewards/chosen": 0.4470914602279663, "rewards/margins": 3.5402493476867676, "rewards/rejected": -3.0931577682495117, "step": 3368 }, { "epoch": 0.39, "learning_rate": 1.8629287135666627e-07, "logits/chosen": -3.631865978240967, "logits/rejected": -3.37953519821167, "logps/chosen": -140.4248809814453, "logps/rejected": -96.6922607421875, "loss": 0.6306, "rewards/accuracies": 0.625, "rewards/chosen": -0.6538541316986084, "rewards/margins": 0.5482792258262634, "rewards/rejected": -1.2021334171295166, "step": 3369 }, { "epoch": 0.39, "learning_rate": 1.8625775488704203e-07, "logits/chosen": -2.8829598426818848, "logits/rejected": -3.028331756591797, "logps/chosen": -219.79953002929688, "logps/rejected": -194.84095764160156, "loss": 0.2894, "rewards/accuracies": 0.875, "rewards/chosen": 0.2292080819606781, "rewards/margins": 2.234205722808838, "rewards/rejected": -2.004997730255127, "step": 3370 }, { "epoch": 0.39, "learning_rate": 1.8622263841741778e-07, "logits/chosen": -2.3341972827911377, "logits/rejected": -2.4414947032928467, "logps/chosen": -204.51611328125, "logps/rejected": -239.00143432617188, "loss": 0.2477, "rewards/accuracies": 1.0, "rewards/chosen": 0.35514748096466064, "rewards/margins": 1.5296046733856201, "rewards/rejected": -1.1744571924209595, "step": 3371 }, { "epoch": 0.39, "learning_rate": 1.861875219477935e-07, "logits/chosen": -2.627225160598755, "logits/rejected": -2.6667089462280273, "logps/chosen": -554.605712890625, "logps/rejected": -419.80242919921875, "loss": 0.2777, "rewards/accuracies": 0.875, "rewards/chosen": 0.1647966504096985, "rewards/margins": 2.3659350872039795, "rewards/rejected": -2.201138496398926, "step": 3372 }, { "epoch": 0.39, "learning_rate": 1.8615240547816926e-07, "logits/chosen": -2.8291306495666504, "logits/rejected": -2.8740427494049072, "logps/chosen": -400.13616943359375, "logps/rejected": -280.4493408203125, "loss": 0.6007, "rewards/accuracies": 0.625, "rewards/chosen": -0.12412129342556, "rewards/margins": 1.0625388622283936, "rewards/rejected": -1.1866602897644043, "step": 3373 }, { "epoch": 0.39, "learning_rate": 1.86117289008545e-07, "logits/chosen": -3.0686099529266357, "logits/rejected": -3.1982178688049316, "logps/chosen": -247.5400390625, "logps/rejected": -229.33641052246094, "loss": 0.3827, "rewards/accuracies": 0.875, "rewards/chosen": 0.24513068795204163, "rewards/margins": 1.2210686206817627, "rewards/rejected": -0.9759379029273987, "step": 3374 }, { "epoch": 0.39, "learning_rate": 1.8608217253892074e-07, "logits/chosen": -2.6899802684783936, "logits/rejected": -2.7356677055358887, "logps/chosen": -147.55157470703125, "logps/rejected": -205.00828552246094, "loss": 0.2274, "rewards/accuracies": 1.0, "rewards/chosen": -0.14768216013908386, "rewards/margins": 1.979715347290039, "rewards/rejected": -2.1273975372314453, "step": 3375 }, { "epoch": 0.39, "learning_rate": 1.860470560692965e-07, "logits/chosen": -3.8911726474761963, "logits/rejected": -4.114433765411377, "logps/chosen": -177.40090942382812, "logps/rejected": -209.9423828125, "loss": 0.4896, "rewards/accuracies": 0.75, "rewards/chosen": -0.5649049878120422, "rewards/margins": 2.5308401584625244, "rewards/rejected": -3.095745086669922, "step": 3376 }, { "epoch": 0.39, "learning_rate": 1.8601193959967222e-07, "logits/chosen": -2.7695517539978027, "logits/rejected": -2.738558053970337, "logps/chosen": -286.91607666015625, "logps/rejected": -199.46896362304688, "loss": 0.3297, "rewards/accuracies": 1.0, "rewards/chosen": -0.03962108492851257, "rewards/margins": 1.0827757120132446, "rewards/rejected": -1.1223968267440796, "step": 3377 }, { "epoch": 0.39, "learning_rate": 1.8597682313004798e-07, "logits/chosen": -2.427811622619629, "logits/rejected": -2.4038844108581543, "logps/chosen": -369.1319580078125, "logps/rejected": -308.73876953125, "loss": 0.3693, "rewards/accuracies": 0.875, "rewards/chosen": 0.03487871587276459, "rewards/margins": 1.2280731201171875, "rewards/rejected": -1.1931943893432617, "step": 3378 }, { "epoch": 0.39, "learning_rate": 1.859417066604237e-07, "logits/chosen": -2.4350507259368896, "logits/rejected": -2.372783899307251, "logps/chosen": -367.0918884277344, "logps/rejected": -340.97662353515625, "loss": 0.5742, "rewards/accuracies": 0.75, "rewards/chosen": -0.1634221076965332, "rewards/margins": 1.012915015220642, "rewards/rejected": -1.1763370037078857, "step": 3379 }, { "epoch": 0.39, "learning_rate": 1.8590659019079949e-07, "logits/chosen": -2.682093620300293, "logits/rejected": -2.9124908447265625, "logps/chosen": -173.0694122314453, "logps/rejected": -275.68914794921875, "loss": 0.3007, "rewards/accuracies": 0.875, "rewards/chosen": 0.2074582278728485, "rewards/margins": 3.350914239883423, "rewards/rejected": -3.143455982208252, "step": 3380 }, { "epoch": 0.39, "learning_rate": 1.8587147372117524e-07, "logits/chosen": -3.0590505599975586, "logits/rejected": -3.3552021980285645, "logps/chosen": -234.07838439941406, "logps/rejected": -289.84600830078125, "loss": 0.3092, "rewards/accuracies": 0.875, "rewards/chosen": 0.1857576221227646, "rewards/margins": 2.1563804149627686, "rewards/rejected": -1.9706226587295532, "step": 3381 }, { "epoch": 0.39, "learning_rate": 1.8583635725155097e-07, "logits/chosen": -3.3385438919067383, "logits/rejected": -3.1695470809936523, "logps/chosen": -155.19070434570312, "logps/rejected": -276.621337890625, "loss": 0.2268, "rewards/accuracies": 0.75, "rewards/chosen": 0.09304594993591309, "rewards/margins": 3.2190189361572266, "rewards/rejected": -3.1259727478027344, "step": 3382 }, { "epoch": 0.39, "learning_rate": 1.8580124078192672e-07, "logits/chosen": -3.629153251647949, "logits/rejected": -3.4300785064697266, "logps/chosen": -499.6724853515625, "logps/rejected": -278.95361328125, "loss": 0.3443, "rewards/accuracies": 0.875, "rewards/chosen": -0.4883619546890259, "rewards/margins": 1.6910970211029053, "rewards/rejected": -2.1794590950012207, "step": 3383 }, { "epoch": 0.39, "learning_rate": 1.8576612431230247e-07, "logits/chosen": -2.231999397277832, "logits/rejected": -2.3044333457946777, "logps/chosen": -304.4563903808594, "logps/rejected": -261.8544921875, "loss": 0.4567, "rewards/accuracies": 0.875, "rewards/chosen": -0.24544525146484375, "rewards/margins": 1.9181358814239502, "rewards/rejected": -2.163581371307373, "step": 3384 }, { "epoch": 0.39, "learning_rate": 1.857310078426782e-07, "logits/chosen": -2.8477489948272705, "logits/rejected": -2.8152599334716797, "logps/chosen": -200.8197784423828, "logps/rejected": -267.375, "loss": 0.6922, "rewards/accuracies": 0.625, "rewards/chosen": -0.37946486473083496, "rewards/margins": 0.18810680508613586, "rewards/rejected": -0.567571759223938, "step": 3385 }, { "epoch": 0.39, "learning_rate": 1.8569589137305396e-07, "logits/chosen": -3.839646339416504, "logits/rejected": -3.7602250576019287, "logps/chosen": -135.00042724609375, "logps/rejected": -175.22726440429688, "loss": 0.436, "rewards/accuracies": 0.75, "rewards/chosen": -0.16968408226966858, "rewards/margins": 1.6109760999679565, "rewards/rejected": -1.7806601524353027, "step": 3386 }, { "epoch": 0.39, "learning_rate": 1.8566077490342968e-07, "logits/chosen": -2.488852024078369, "logits/rejected": -2.615156888961792, "logps/chosen": -227.81906127929688, "logps/rejected": -220.7418212890625, "loss": 0.6238, "rewards/accuracies": 0.5, "rewards/chosen": 0.012204080820083618, "rewards/margins": 0.808881938457489, "rewards/rejected": -0.796677827835083, "step": 3387 }, { "epoch": 0.39, "learning_rate": 1.8562565843380544e-07, "logits/chosen": -3.6475515365600586, "logits/rejected": -3.4200499057769775, "logps/chosen": -272.94586181640625, "logps/rejected": -214.51524353027344, "loss": 0.7092, "rewards/accuracies": 0.5, "rewards/chosen": -0.6195650696754456, "rewards/margins": 0.14976060390472412, "rewards/rejected": -0.7693256735801697, "step": 3388 }, { "epoch": 0.39, "learning_rate": 1.855905419641812e-07, "logits/chosen": -2.964409828186035, "logits/rejected": -3.0110695362091064, "logps/chosen": -193.103759765625, "logps/rejected": -265.68994140625, "loss": 0.3673, "rewards/accuracies": 0.875, "rewards/chosen": 0.30437523126602173, "rewards/margins": 3.1547951698303223, "rewards/rejected": -2.8504199981689453, "step": 3389 }, { "epoch": 0.39, "learning_rate": 1.8555542549455692e-07, "logits/chosen": -3.060431957244873, "logits/rejected": -2.884021282196045, "logps/chosen": -243.8343505859375, "logps/rejected": -312.174560546875, "loss": 0.2885, "rewards/accuracies": 0.875, "rewards/chosen": -0.6447136402130127, "rewards/margins": 2.2804665565490723, "rewards/rejected": -2.925179958343506, "step": 3390 }, { "epoch": 0.39, "learning_rate": 1.855203090249327e-07, "logits/chosen": -3.038705825805664, "logits/rejected": -2.8781962394714355, "logps/chosen": -329.5721435546875, "logps/rejected": -197.72413635253906, "loss": 0.4405, "rewards/accuracies": 0.75, "rewards/chosen": 0.11811991035938263, "rewards/margins": 1.1736043691635132, "rewards/rejected": -1.055484414100647, "step": 3391 }, { "epoch": 0.39, "learning_rate": 1.8548519255530845e-07, "logits/chosen": -3.295572280883789, "logits/rejected": -3.2315125465393066, "logps/chosen": -301.0611572265625, "logps/rejected": -287.15594482421875, "loss": 0.3519, "rewards/accuracies": 1.0, "rewards/chosen": -0.5227702856063843, "rewards/margins": 1.3830246925354004, "rewards/rejected": -1.9057950973510742, "step": 3392 }, { "epoch": 0.39, "learning_rate": 1.8545007608568418e-07, "logits/chosen": -2.9663784503936768, "logits/rejected": -3.289010524749756, "logps/chosen": -98.59745788574219, "logps/rejected": -177.1878662109375, "loss": 0.1957, "rewards/accuracies": 1.0, "rewards/chosen": 0.1801941692829132, "rewards/margins": 2.462151527404785, "rewards/rejected": -2.2819571495056152, "step": 3393 }, { "epoch": 0.39, "learning_rate": 1.8541495961605993e-07, "logits/chosen": -2.7384355068206787, "logits/rejected": -2.58261775970459, "logps/chosen": -149.14356994628906, "logps/rejected": -212.92825317382812, "loss": 0.3339, "rewards/accuracies": 0.75, "rewards/chosen": -0.10349922627210617, "rewards/margins": 2.724579095840454, "rewards/rejected": -2.828078508377075, "step": 3394 }, { "epoch": 0.39, "learning_rate": 1.8537984314643566e-07, "logits/chosen": -3.217405319213867, "logits/rejected": -3.0452003479003906, "logps/chosen": -286.14044189453125, "logps/rejected": -265.0211181640625, "loss": 0.7018, "rewards/accuracies": 0.625, "rewards/chosen": -0.41152554750442505, "rewards/margins": 0.3327970504760742, "rewards/rejected": -0.744322657585144, "step": 3395 }, { "epoch": 0.39, "learning_rate": 1.8534472667681141e-07, "logits/chosen": -3.1047143936157227, "logits/rejected": -3.174858331680298, "logps/chosen": -212.82627868652344, "logps/rejected": -123.75166320800781, "loss": 0.4077, "rewards/accuracies": 0.875, "rewards/chosen": 0.22732719779014587, "rewards/margins": 1.6396843194961548, "rewards/rejected": -1.4123570919036865, "step": 3396 }, { "epoch": 0.39, "learning_rate": 1.8530961020718717e-07, "logits/chosen": -3.093470335006714, "logits/rejected": -3.0289719104766846, "logps/chosen": -279.82989501953125, "logps/rejected": -364.48529052734375, "loss": 0.4333, "rewards/accuracies": 0.75, "rewards/chosen": 0.05530785024166107, "rewards/margins": 2.0141072273254395, "rewards/rejected": -1.9587993621826172, "step": 3397 }, { "epoch": 0.39, "learning_rate": 1.852744937375629e-07, "logits/chosen": -3.001474380493164, "logits/rejected": -3.060197353363037, "logps/chosen": -322.37078857421875, "logps/rejected": -351.6236267089844, "loss": 0.285, "rewards/accuracies": 0.875, "rewards/chosen": -0.39066553115844727, "rewards/margins": 2.1257545948028564, "rewards/rejected": -2.5164201259613037, "step": 3398 }, { "epoch": 0.39, "learning_rate": 1.8523937726793865e-07, "logits/chosen": -4.070487976074219, "logits/rejected": -3.6121044158935547, "logps/chosen": -418.24908447265625, "logps/rejected": -237.89312744140625, "loss": 0.1945, "rewards/accuracies": 1.0, "rewards/chosen": -0.18658122420310974, "rewards/margins": 2.391826629638672, "rewards/rejected": -2.5784077644348145, "step": 3399 }, { "epoch": 0.39, "learning_rate": 1.852042607983144e-07, "logits/chosen": -2.8050506114959717, "logits/rejected": -3.3182506561279297, "logps/chosen": -347.0992431640625, "logps/rejected": -157.43162536621094, "loss": 0.3915, "rewards/accuracies": 0.875, "rewards/chosen": 0.12073326110839844, "rewards/margins": 1.359837532043457, "rewards/rejected": -1.2391042709350586, "step": 3400 }, { "epoch": 0.39, "learning_rate": 1.8516914432869013e-07, "logits/chosen": -3.655104637145996, "logits/rejected": -3.44968318939209, "logps/chosen": -286.94415283203125, "logps/rejected": -227.41424560546875, "loss": 0.3225, "rewards/accuracies": 0.875, "rewards/chosen": -0.32168933749198914, "rewards/margins": 1.2877914905548096, "rewards/rejected": -1.6094807386398315, "step": 3401 }, { "epoch": 0.39, "learning_rate": 1.851340278590659e-07, "logits/chosen": -2.6493191719055176, "logits/rejected": -2.5444202423095703, "logps/chosen": -277.2802429199219, "logps/rejected": -232.4818115234375, "loss": 0.1712, "rewards/accuracies": 1.0, "rewards/chosen": 0.13050682842731476, "rewards/margins": 2.367535352706909, "rewards/rejected": -2.2370285987854004, "step": 3402 }, { "epoch": 0.39, "learning_rate": 1.8509891138944164e-07, "logits/chosen": -3.004007339477539, "logits/rejected": -3.000755786895752, "logps/chosen": -174.97518920898438, "logps/rejected": -272.045166015625, "loss": 0.6057, "rewards/accuracies": 0.625, "rewards/chosen": -0.5065417289733887, "rewards/margins": 0.756436824798584, "rewards/rejected": -1.2629785537719727, "step": 3403 }, { "epoch": 0.39, "learning_rate": 1.850637949198174e-07, "logits/chosen": -3.5222623348236084, "logits/rejected": -3.2390434741973877, "logps/chosen": -234.52227783203125, "logps/rejected": -183.23561096191406, "loss": 0.4366, "rewards/accuracies": 0.75, "rewards/chosen": 0.023716449737548828, "rewards/margins": 1.4353289604187012, "rewards/rejected": -1.4116125106811523, "step": 3404 }, { "epoch": 0.39, "learning_rate": 1.8502867845019315e-07, "logits/chosen": -2.6648526191711426, "logits/rejected": -3.119711399078369, "logps/chosen": -295.4226989746094, "logps/rejected": -249.0082244873047, "loss": 0.4064, "rewards/accuracies": 0.875, "rewards/chosen": -0.07682031393051147, "rewards/margins": 1.5970805883407593, "rewards/rejected": -1.6739009618759155, "step": 3405 }, { "epoch": 0.39, "learning_rate": 1.8499356198056887e-07, "logits/chosen": -2.5974478721618652, "logits/rejected": -2.548638105392456, "logps/chosen": -344.97052001953125, "logps/rejected": -248.51914978027344, "loss": 0.2161, "rewards/accuracies": 0.875, "rewards/chosen": 0.3117366433143616, "rewards/margins": 2.454721450805664, "rewards/rejected": -2.1429848670959473, "step": 3406 }, { "epoch": 0.39, "learning_rate": 1.8495844551094463e-07, "logits/chosen": -3.1767420768737793, "logits/rejected": -3.5475704669952393, "logps/chosen": -214.65811157226562, "logps/rejected": -239.67112731933594, "loss": 0.2957, "rewards/accuracies": 0.875, "rewards/chosen": 0.23205077648162842, "rewards/margins": 2.327207326889038, "rewards/rejected": -2.09515643119812, "step": 3407 }, { "epoch": 0.39, "learning_rate": 1.8492332904132038e-07, "logits/chosen": -4.057716369628906, "logits/rejected": -4.095120429992676, "logps/chosen": -294.60040283203125, "logps/rejected": -363.2543029785156, "loss": 0.2342, "rewards/accuracies": 0.875, "rewards/chosen": -0.04292702674865723, "rewards/margins": 2.290642499923706, "rewards/rejected": -2.3335695266723633, "step": 3408 }, { "epoch": 0.39, "learning_rate": 1.848882125716961e-07, "logits/chosen": -2.7262916564941406, "logits/rejected": -2.8955628871917725, "logps/chosen": -149.23635864257812, "logps/rejected": -152.49020385742188, "loss": 0.4906, "rewards/accuracies": 0.75, "rewards/chosen": -0.015595519915223122, "rewards/margins": 0.8295466303825378, "rewards/rejected": -0.845142126083374, "step": 3409 }, { "epoch": 0.39, "learning_rate": 1.8485309610207186e-07, "logits/chosen": -2.4477405548095703, "logits/rejected": -2.5379374027252197, "logps/chosen": -320.546142578125, "logps/rejected": -291.97137451171875, "loss": 0.4202, "rewards/accuracies": 0.875, "rewards/chosen": -0.06467342376708984, "rewards/margins": 1.3058156967163086, "rewards/rejected": -1.3704891204833984, "step": 3410 }, { "epoch": 0.39, "learning_rate": 1.848179796324476e-07, "logits/chosen": -4.032327651977539, "logits/rejected": -3.812528371810913, "logps/chosen": -284.2132873535156, "logps/rejected": -228.83348083496094, "loss": 0.4528, "rewards/accuracies": 0.75, "rewards/chosen": -0.5192509889602661, "rewards/margins": 1.3175041675567627, "rewards/rejected": -1.8367552757263184, "step": 3411 }, { "epoch": 0.39, "learning_rate": 1.8478286316282334e-07, "logits/chosen": -2.609930992126465, "logits/rejected": -2.7362887859344482, "logps/chosen": -284.83740234375, "logps/rejected": -198.05532836914062, "loss": 0.3028, "rewards/accuracies": 0.875, "rewards/chosen": 0.6130197644233704, "rewards/margins": 2.0226969718933105, "rewards/rejected": -1.409677267074585, "step": 3412 }, { "epoch": 0.39, "learning_rate": 1.8474774669319912e-07, "logits/chosen": -3.4208579063415527, "logits/rejected": -3.596730947494507, "logps/chosen": -137.12066650390625, "logps/rejected": -167.757568359375, "loss": 0.349, "rewards/accuracies": 0.75, "rewards/chosen": -0.17109420895576477, "rewards/margins": 1.3649487495422363, "rewards/rejected": -1.5360430479049683, "step": 3413 }, { "epoch": 0.39, "learning_rate": 1.8471263022357485e-07, "logits/chosen": -3.343433141708374, "logits/rejected": -3.1570382118225098, "logps/chosen": -240.90579223632812, "logps/rejected": -194.027099609375, "loss": 0.1756, "rewards/accuracies": 1.0, "rewards/chosen": 0.04245595633983612, "rewards/margins": 2.2222485542297363, "rewards/rejected": -2.179792642593384, "step": 3414 }, { "epoch": 0.39, "learning_rate": 1.846775137539506e-07, "logits/chosen": -3.061934232711792, "logits/rejected": -2.7414865493774414, "logps/chosen": -209.19700622558594, "logps/rejected": -230.2843017578125, "loss": 0.5469, "rewards/accuracies": 0.625, "rewards/chosen": -0.1764589250087738, "rewards/margins": 0.608339786529541, "rewards/rejected": -0.7847987413406372, "step": 3415 }, { "epoch": 0.39, "learning_rate": 1.8464239728432636e-07, "logits/chosen": -3.14156174659729, "logits/rejected": -3.2964727878570557, "logps/chosen": -310.32318115234375, "logps/rejected": -256.63250732421875, "loss": 0.4196, "rewards/accuracies": 0.75, "rewards/chosen": 0.026013828814029694, "rewards/margins": 1.2330732345581055, "rewards/rejected": -1.2070592641830444, "step": 3416 }, { "epoch": 0.39, "learning_rate": 1.8460728081470209e-07, "logits/chosen": -2.322971820831299, "logits/rejected": -2.3209328651428223, "logps/chosen": -200.73098754882812, "logps/rejected": -250.62603759765625, "loss": 0.5926, "rewards/accuracies": 0.75, "rewards/chosen": -0.013542119413614273, "rewards/margins": 0.9380231499671936, "rewards/rejected": -0.9515652656555176, "step": 3417 }, { "epoch": 0.39, "learning_rate": 1.8457216434507784e-07, "logits/chosen": -3.3814640045166016, "logits/rejected": -3.225640058517456, "logps/chosen": -222.65309143066406, "logps/rejected": -244.70159912109375, "loss": 0.7562, "rewards/accuracies": 0.75, "rewards/chosen": -1.0886484384536743, "rewards/margins": 1.0527050495147705, "rewards/rejected": -2.1413536071777344, "step": 3418 }, { "epoch": 0.39, "learning_rate": 1.8453704787545357e-07, "logits/chosen": -3.8868417739868164, "logits/rejected": -3.726973533630371, "logps/chosen": -220.2620391845703, "logps/rejected": -229.06588745117188, "loss": 0.3496, "rewards/accuracies": 0.75, "rewards/chosen": -0.0012073889374732971, "rewards/margins": 2.6934709548950195, "rewards/rejected": -2.69467830657959, "step": 3419 }, { "epoch": 0.39, "learning_rate": 1.8450193140582932e-07, "logits/chosen": -2.560168743133545, "logits/rejected": -2.5692739486694336, "logps/chosen": -242.5186767578125, "logps/rejected": -231.54470825195312, "loss": 0.8903, "rewards/accuracies": 0.625, "rewards/chosen": -0.9504167437553406, "rewards/margins": 0.329061895608902, "rewards/rejected": -1.279478669166565, "step": 3420 }, { "epoch": 0.39, "learning_rate": 1.8446681493620508e-07, "logits/chosen": -2.688538074493408, "logits/rejected": -2.659205913543701, "logps/chosen": -184.4125213623047, "logps/rejected": -174.36456298828125, "loss": 0.9505, "rewards/accuracies": 0.5, "rewards/chosen": -1.1642804145812988, "rewards/margins": 0.6069768667221069, "rewards/rejected": -1.7712574005126953, "step": 3421 }, { "epoch": 0.39, "learning_rate": 1.844316984665808e-07, "logits/chosen": -2.7949483394622803, "logits/rejected": -2.58508038520813, "logps/chosen": -494.8498840332031, "logps/rejected": -289.1013488769531, "loss": 0.3411, "rewards/accuracies": 0.875, "rewards/chosen": -0.1843445897102356, "rewards/margins": 1.3441722393035889, "rewards/rejected": -1.5285168886184692, "step": 3422 }, { "epoch": 0.39, "learning_rate": 1.8439658199695656e-07, "logits/chosen": -2.9284255504608154, "logits/rejected": -3.0344367027282715, "logps/chosen": -141.4424285888672, "logps/rejected": -253.0063018798828, "loss": 0.3252, "rewards/accuracies": 0.875, "rewards/chosen": -0.1213291585445404, "rewards/margins": 2.977273941040039, "rewards/rejected": -3.0986032485961914, "step": 3423 }, { "epoch": 0.39, "learning_rate": 1.8436146552733234e-07, "logits/chosen": -2.9407036304473877, "logits/rejected": -2.6752398014068604, "logps/chosen": -319.13568115234375, "logps/rejected": -310.0118103027344, "loss": 0.407, "rewards/accuracies": 0.875, "rewards/chosen": 0.38937199115753174, "rewards/margins": 1.0542962551116943, "rewards/rejected": -0.6649242043495178, "step": 3424 }, { "epoch": 0.39, "learning_rate": 1.8432634905770806e-07, "logits/chosen": -2.160059928894043, "logits/rejected": -2.2293734550476074, "logps/chosen": -371.50390625, "logps/rejected": -349.00189208984375, "loss": 0.2931, "rewards/accuracies": 0.875, "rewards/chosen": -0.10690009593963623, "rewards/margins": 3.0078330039978027, "rewards/rejected": -3.1147329807281494, "step": 3425 }, { "epoch": 0.39, "learning_rate": 1.8429123258808382e-07, "logits/chosen": -2.670339822769165, "logits/rejected": -2.7568917274475098, "logps/chosen": -311.570068359375, "logps/rejected": -395.3167724609375, "loss": 0.5987, "rewards/accuracies": 0.625, "rewards/chosen": 0.18331420421600342, "rewards/margins": 0.5684347748756409, "rewards/rejected": -0.38512054085731506, "step": 3426 }, { "epoch": 0.4, "learning_rate": 1.8425611611845955e-07, "logits/chosen": -3.1106386184692383, "logits/rejected": -2.9441208839416504, "logps/chosen": -161.59664916992188, "logps/rejected": -213.43841552734375, "loss": 0.367, "rewards/accuracies": 1.0, "rewards/chosen": 0.25946155190467834, "rewards/margins": 1.148641586303711, "rewards/rejected": -0.889180064201355, "step": 3427 }, { "epoch": 0.4, "learning_rate": 1.842209996488353e-07, "logits/chosen": -3.5887045860290527, "logits/rejected": -3.5745785236358643, "logps/chosen": -255.00872802734375, "logps/rejected": -195.31399536132812, "loss": 0.6525, "rewards/accuracies": 0.5, "rewards/chosen": -0.4729490578174591, "rewards/margins": 0.9715657234191895, "rewards/rejected": -1.4445146322250366, "step": 3428 }, { "epoch": 0.4, "learning_rate": 1.8418588317921105e-07, "logits/chosen": -3.4735629558563232, "logits/rejected": -3.0894460678100586, "logps/chosen": -148.03395080566406, "logps/rejected": -200.5955810546875, "loss": 0.3102, "rewards/accuracies": 1.0, "rewards/chosen": -0.2623097002506256, "rewards/margins": 1.661289095878601, "rewards/rejected": -1.9235990047454834, "step": 3429 }, { "epoch": 0.4, "learning_rate": 1.8415076670958678e-07, "logits/chosen": -3.4379820823669434, "logits/rejected": -3.3982369899749756, "logps/chosen": -157.95657348632812, "logps/rejected": -231.3470001220703, "loss": 0.5098, "rewards/accuracies": 0.75, "rewards/chosen": -0.15168511867523193, "rewards/margins": 1.4541528224945068, "rewards/rejected": -1.6058378219604492, "step": 3430 }, { "epoch": 0.4, "learning_rate": 1.8411565023996253e-07, "logits/chosen": -2.6997365951538086, "logits/rejected": -2.529655933380127, "logps/chosen": -304.3262939453125, "logps/rejected": -292.0592346191406, "loss": 0.3637, "rewards/accuracies": 0.875, "rewards/chosen": -0.06559017300605774, "rewards/margins": 2.0350987911224365, "rewards/rejected": -2.100688934326172, "step": 3431 }, { "epoch": 0.4, "learning_rate": 1.8408053377033826e-07, "logits/chosen": -3.7115635871887207, "logits/rejected": -3.5179336071014404, "logps/chosen": -359.3333740234375, "logps/rejected": -260.69757080078125, "loss": 0.676, "rewards/accuracies": 0.75, "rewards/chosen": -0.6465620994567871, "rewards/margins": 1.2208415269851685, "rewards/rejected": -1.8674036264419556, "step": 3432 }, { "epoch": 0.4, "learning_rate": 1.8404541730071402e-07, "logits/chosen": -3.126112222671509, "logits/rejected": -3.272031545639038, "logps/chosen": -341.91534423828125, "logps/rejected": -235.974365234375, "loss": 0.2768, "rewards/accuracies": 0.875, "rewards/chosen": -0.1963905245065689, "rewards/margins": 2.88618540763855, "rewards/rejected": -3.082575798034668, "step": 3433 }, { "epoch": 0.4, "learning_rate": 1.8401030083108977e-07, "logits/chosen": -3.420992374420166, "logits/rejected": -3.3470547199249268, "logps/chosen": -401.2160949707031, "logps/rejected": -374.7069396972656, "loss": 0.3688, "rewards/accuracies": 0.875, "rewards/chosen": 0.24546414613723755, "rewards/margins": 1.626028060913086, "rewards/rejected": -1.3805640935897827, "step": 3434 }, { "epoch": 0.4, "learning_rate": 1.839751843614655e-07, "logits/chosen": -3.233541250228882, "logits/rejected": -3.424907922744751, "logps/chosen": -333.7332458496094, "logps/rejected": -289.154541015625, "loss": 0.1087, "rewards/accuracies": 1.0, "rewards/chosen": 0.4791027009487152, "rewards/margins": 2.737766742706299, "rewards/rejected": -2.258664131164551, "step": 3435 }, { "epoch": 0.4, "learning_rate": 1.8394006789184128e-07, "logits/chosen": -3.577627182006836, "logits/rejected": -3.5800490379333496, "logps/chosen": -253.7808837890625, "logps/rejected": -295.1513671875, "loss": 0.5732, "rewards/accuracies": 0.75, "rewards/chosen": -0.25326278805732727, "rewards/margins": 0.9296380281448364, "rewards/rejected": -1.1829009056091309, "step": 3436 }, { "epoch": 0.4, "learning_rate": 1.8390495142221703e-07, "logits/chosen": -2.829408645629883, "logits/rejected": -2.527937412261963, "logps/chosen": -308.918701171875, "logps/rejected": -309.6043701171875, "loss": 0.3252, "rewards/accuracies": 1.0, "rewards/chosen": -0.18247976899147034, "rewards/margins": 1.5011465549468994, "rewards/rejected": -1.6836262941360474, "step": 3437 }, { "epoch": 0.4, "learning_rate": 1.8386983495259276e-07, "logits/chosen": -2.8233695030212402, "logits/rejected": -2.999967098236084, "logps/chosen": -182.94113159179688, "logps/rejected": -326.7904357910156, "loss": 0.6618, "rewards/accuracies": 0.625, "rewards/chosen": -0.5578591227531433, "rewards/margins": 0.7877384424209595, "rewards/rejected": -1.3455976247787476, "step": 3438 }, { "epoch": 0.4, "learning_rate": 1.838347184829685e-07, "logits/chosen": -3.5705630779266357, "logits/rejected": -3.539708137512207, "logps/chosen": -222.5360107421875, "logps/rejected": -236.65380859375, "loss": 0.2035, "rewards/accuracies": 1.0, "rewards/chosen": 0.33820006251335144, "rewards/margins": 2.008821964263916, "rewards/rejected": -1.6706221103668213, "step": 3439 }, { "epoch": 0.4, "learning_rate": 1.8379960201334424e-07, "logits/chosen": -3.048891544342041, "logits/rejected": -2.9751343727111816, "logps/chosen": -97.4214096069336, "logps/rejected": -145.11106872558594, "loss": 0.4746, "rewards/accuracies": 0.875, "rewards/chosen": -0.4986240565776825, "rewards/margins": 0.7183965444564819, "rewards/rejected": -1.2170205116271973, "step": 3440 }, { "epoch": 0.4, "learning_rate": 1.8376448554372e-07, "logits/chosen": -3.4824090003967285, "logits/rejected": -3.376521110534668, "logps/chosen": -293.71234130859375, "logps/rejected": -243.0302276611328, "loss": 0.3933, "rewards/accuracies": 0.625, "rewards/chosen": -0.12637832760810852, "rewards/margins": 1.6994874477386475, "rewards/rejected": -1.8258657455444336, "step": 3441 }, { "epoch": 0.4, "learning_rate": 1.8372936907409575e-07, "logits/chosen": -2.820589303970337, "logits/rejected": -2.6469178199768066, "logps/chosen": -107.77979278564453, "logps/rejected": -210.06214904785156, "loss": 0.7545, "rewards/accuracies": 0.625, "rewards/chosen": -0.5823734402656555, "rewards/margins": 0.4806463122367859, "rewards/rejected": -1.0630197525024414, "step": 3442 }, { "epoch": 0.4, "learning_rate": 1.8369425260447148e-07, "logits/chosen": -3.081399917602539, "logits/rejected": -3.029306650161743, "logps/chosen": -284.517333984375, "logps/rejected": -267.10015869140625, "loss": 0.3037, "rewards/accuracies": 1.0, "rewards/chosen": -0.04415444657206535, "rewards/margins": 2.6443567276000977, "rewards/rejected": -2.688511371612549, "step": 3443 }, { "epoch": 0.4, "learning_rate": 1.8365913613484723e-07, "logits/chosen": -3.756296157836914, "logits/rejected": -3.645860433578491, "logps/chosen": -220.17391967773438, "logps/rejected": -229.02725219726562, "loss": 0.3976, "rewards/accuracies": 0.875, "rewards/chosen": 0.42754799127578735, "rewards/margins": 1.1632983684539795, "rewards/rejected": -0.7357503771781921, "step": 3444 }, { "epoch": 0.4, "learning_rate": 1.8362401966522298e-07, "logits/chosen": -2.9803335666656494, "logits/rejected": -3.0743279457092285, "logps/chosen": -193.96510314941406, "logps/rejected": -273.0840759277344, "loss": 0.3767, "rewards/accuracies": 0.75, "rewards/chosen": 0.1567777693271637, "rewards/margins": 1.538785457611084, "rewards/rejected": -1.3820075988769531, "step": 3445 }, { "epoch": 0.4, "learning_rate": 1.835889031955987e-07, "logits/chosen": -2.8084921836853027, "logits/rejected": -3.286257743835449, "logps/chosen": -179.17327880859375, "logps/rejected": -257.85699462890625, "loss": 0.5309, "rewards/accuracies": 0.875, "rewards/chosen": -0.0890406146645546, "rewards/margins": 1.8660002946853638, "rewards/rejected": -1.9550409317016602, "step": 3446 }, { "epoch": 0.4, "learning_rate": 1.835537867259745e-07, "logits/chosen": -3.215855598449707, "logits/rejected": -2.7233681678771973, "logps/chosen": -433.71429443359375, "logps/rejected": -362.7952880859375, "loss": 0.1659, "rewards/accuracies": 1.0, "rewards/chosen": 0.5932531356811523, "rewards/margins": 2.6540896892547607, "rewards/rejected": -2.0608367919921875, "step": 3447 }, { "epoch": 0.4, "learning_rate": 1.8351867025635022e-07, "logits/chosen": -3.5963690280914307, "logits/rejected": -3.6705715656280518, "logps/chosen": -168.9079132080078, "logps/rejected": -136.09437561035156, "loss": 0.762, "rewards/accuracies": 0.625, "rewards/chosen": -0.5231258273124695, "rewards/margins": 0.8437231779098511, "rewards/rejected": -1.3668489456176758, "step": 3448 }, { "epoch": 0.4, "learning_rate": 1.8348355378672597e-07, "logits/chosen": -3.612478494644165, "logits/rejected": -3.0914835929870605, "logps/chosen": -476.5375061035156, "logps/rejected": -362.8228759765625, "loss": 0.2293, "rewards/accuracies": 1.0, "rewards/chosen": -0.2538681626319885, "rewards/margins": 2.132351875305176, "rewards/rejected": -2.3862199783325195, "step": 3449 }, { "epoch": 0.4, "learning_rate": 1.8344843731710173e-07, "logits/chosen": -3.289804458618164, "logits/rejected": -3.276055335998535, "logps/chosen": -142.82647705078125, "logps/rejected": -131.37258911132812, "loss": 0.5562, "rewards/accuracies": 0.625, "rewards/chosen": 0.19650857150554657, "rewards/margins": 1.370190143585205, "rewards/rejected": -1.1736814975738525, "step": 3450 }, { "epoch": 0.4, "learning_rate": 1.8341332084747745e-07, "logits/chosen": -3.6379916667938232, "logits/rejected": -3.678070306777954, "logps/chosen": -205.85464477539062, "logps/rejected": -208.7236328125, "loss": 0.3036, "rewards/accuracies": 0.875, "rewards/chosen": 0.12542441487312317, "rewards/margins": 1.9577301740646362, "rewards/rejected": -1.8323057889938354, "step": 3451 }, { "epoch": 0.4, "learning_rate": 1.833782043778532e-07, "logits/chosen": -2.6341700553894043, "logits/rejected": -2.497532367706299, "logps/chosen": -179.85658264160156, "logps/rejected": -341.32855224609375, "loss": 0.2213, "rewards/accuracies": 0.875, "rewards/chosen": 0.14233654737472534, "rewards/margins": 2.1341073513031006, "rewards/rejected": -1.9917707443237305, "step": 3452 }, { "epoch": 0.4, "learning_rate": 1.8334308790822896e-07, "logits/chosen": -3.6272635459899902, "logits/rejected": -3.214503049850464, "logps/chosen": -367.9060363769531, "logps/rejected": -237.32644653320312, "loss": 0.3114, "rewards/accuracies": 1.0, "rewards/chosen": -0.000913769006729126, "rewards/margins": 1.2735432386398315, "rewards/rejected": -1.2744569778442383, "step": 3453 }, { "epoch": 0.4, "learning_rate": 1.833079714386047e-07, "logits/chosen": -2.830575942993164, "logits/rejected": -2.723090887069702, "logps/chosen": -319.3262023925781, "logps/rejected": -242.7493896484375, "loss": 0.431, "rewards/accuracies": 0.625, "rewards/chosen": 0.21272200345993042, "rewards/margins": 1.3225533962249756, "rewards/rejected": -1.10983145236969, "step": 3454 }, { "epoch": 0.4, "learning_rate": 1.8327285496898044e-07, "logits/chosen": -4.272207260131836, "logits/rejected": -3.5548930168151855, "logps/chosen": -246.58929443359375, "logps/rejected": -136.9527587890625, "loss": 0.329, "rewards/accuracies": 0.875, "rewards/chosen": 0.2594621181488037, "rewards/margins": 1.41023588180542, "rewards/rejected": -1.1507737636566162, "step": 3455 }, { "epoch": 0.4, "learning_rate": 1.8323773849935617e-07, "logits/chosen": -3.0373036861419678, "logits/rejected": -3.3966822624206543, "logps/chosen": -116.74152374267578, "logps/rejected": -246.48388671875, "loss": 0.2496, "rewards/accuracies": 1.0, "rewards/chosen": 0.04930911958217621, "rewards/margins": 3.7068915367126465, "rewards/rejected": -3.6575822830200195, "step": 3456 }, { "epoch": 0.4, "learning_rate": 1.8320262202973192e-07, "logits/chosen": -2.892296075820923, "logits/rejected": -2.6429786682128906, "logps/chosen": -266.3183898925781, "logps/rejected": -236.55799865722656, "loss": 0.5891, "rewards/accuracies": 0.5, "rewards/chosen": -0.647473931312561, "rewards/margins": 0.7106512784957886, "rewards/rejected": -1.35812509059906, "step": 3457 }, { "epoch": 0.4, "learning_rate": 1.831675055601077e-07, "logits/chosen": -2.6327829360961914, "logits/rejected": -2.696831226348877, "logps/chosen": -168.52471923828125, "logps/rejected": -153.287109375, "loss": 0.2145, "rewards/accuracies": 0.875, "rewards/chosen": 0.6071206331253052, "rewards/margins": 2.1361184120178223, "rewards/rejected": -1.528997778892517, "step": 3458 }, { "epoch": 0.4, "learning_rate": 1.8313238909048343e-07, "logits/chosen": -2.7673087120056152, "logits/rejected": -2.692427396774292, "logps/chosen": -482.3166198730469, "logps/rejected": -351.4071960449219, "loss": 0.3819, "rewards/accuracies": 0.75, "rewards/chosen": 0.0673666000366211, "rewards/margins": 1.567945122718811, "rewards/rejected": -1.50057852268219, "step": 3459 }, { "epoch": 0.4, "learning_rate": 1.8309727262085918e-07, "logits/chosen": -3.606419563293457, "logits/rejected": -3.5960752964019775, "logps/chosen": -179.07508850097656, "logps/rejected": -248.56195068359375, "loss": 0.2682, "rewards/accuracies": 1.0, "rewards/chosen": -0.4236631989479065, "rewards/margins": 1.633461594581604, "rewards/rejected": -2.0571248531341553, "step": 3460 }, { "epoch": 0.4, "learning_rate": 1.8306215615123494e-07, "logits/chosen": -2.8672056198120117, "logits/rejected": -2.6310644149780273, "logps/chosen": -369.20330810546875, "logps/rejected": -239.59381103515625, "loss": 0.522, "rewards/accuracies": 0.75, "rewards/chosen": -0.2249467521905899, "rewards/margins": 0.8697970509529114, "rewards/rejected": -1.0947437286376953, "step": 3461 }, { "epoch": 0.4, "learning_rate": 1.8302703968161067e-07, "logits/chosen": -3.337010622024536, "logits/rejected": -3.4404327869415283, "logps/chosen": -386.5289306640625, "logps/rejected": -214.51809692382812, "loss": 0.4047, "rewards/accuracies": 0.875, "rewards/chosen": 0.12233559787273407, "rewards/margins": 1.9801111221313477, "rewards/rejected": -1.8577754497528076, "step": 3462 }, { "epoch": 0.4, "learning_rate": 1.8299192321198642e-07, "logits/chosen": -2.7257258892059326, "logits/rejected": -2.9512267112731934, "logps/chosen": -186.25701904296875, "logps/rejected": -314.9561767578125, "loss": 0.3029, "rewards/accuracies": 0.875, "rewards/chosen": 0.11656700074672699, "rewards/margins": 1.6411206722259521, "rewards/rejected": -1.5245535373687744, "step": 3463 }, { "epoch": 0.4, "learning_rate": 1.8295680674236215e-07, "logits/chosen": -2.292220115661621, "logits/rejected": -2.544412612915039, "logps/chosen": -260.32073974609375, "logps/rejected": -299.4255676269531, "loss": 0.3648, "rewards/accuracies": 0.875, "rewards/chosen": 0.41026222705841064, "rewards/margins": 1.7526750564575195, "rewards/rejected": -1.3424127101898193, "step": 3464 }, { "epoch": 0.4, "learning_rate": 1.829216902727379e-07, "logits/chosen": -3.08674955368042, "logits/rejected": -2.9405272006988525, "logps/chosen": -258.9977722167969, "logps/rejected": -232.78359985351562, "loss": 0.2712, "rewards/accuracies": 1.0, "rewards/chosen": 0.12193387001752853, "rewards/margins": 1.6900193691253662, "rewards/rejected": -1.5680854320526123, "step": 3465 }, { "epoch": 0.4, "learning_rate": 1.8288657380311365e-07, "logits/chosen": -2.3859076499938965, "logits/rejected": -2.606198310852051, "logps/chosen": -277.888427734375, "logps/rejected": -326.7913818359375, "loss": 0.6194, "rewards/accuracies": 0.625, "rewards/chosen": 0.2726787328720093, "rewards/margins": 1.7269638776779175, "rewards/rejected": -1.4542850255966187, "step": 3466 }, { "epoch": 0.4, "learning_rate": 1.8285145733348938e-07, "logits/chosen": -3.2121658325195312, "logits/rejected": -3.276298761367798, "logps/chosen": -351.5593566894531, "logps/rejected": -320.284912109375, "loss": 0.2258, "rewards/accuracies": 1.0, "rewards/chosen": 0.07429036498069763, "rewards/margins": 2.455681562423706, "rewards/rejected": -2.3813910484313965, "step": 3467 }, { "epoch": 0.4, "learning_rate": 1.8281634086386514e-07, "logits/chosen": -3.138625144958496, "logits/rejected": -3.0182228088378906, "logps/chosen": -228.5332489013672, "logps/rejected": -195.54931640625, "loss": 0.2832, "rewards/accuracies": 1.0, "rewards/chosen": -0.15359792113304138, "rewards/margins": 1.688624620437622, "rewards/rejected": -1.8422224521636963, "step": 3468 }, { "epoch": 0.4, "learning_rate": 1.8278122439424092e-07, "logits/chosen": -3.1214916706085205, "logits/rejected": -3.3553481101989746, "logps/chosen": -308.2829895019531, "logps/rejected": -228.91238403320312, "loss": 0.3329, "rewards/accuracies": 0.875, "rewards/chosen": -0.5386403799057007, "rewards/margins": 1.3041132688522339, "rewards/rejected": -1.8427536487579346, "step": 3469 }, { "epoch": 0.4, "learning_rate": 1.8274610792461664e-07, "logits/chosen": -3.2479937076568604, "logits/rejected": -3.0262136459350586, "logps/chosen": -261.2726745605469, "logps/rejected": -312.3704528808594, "loss": 0.8092, "rewards/accuracies": 0.75, "rewards/chosen": -0.1200781911611557, "rewards/margins": 0.7003231048583984, "rewards/rejected": -0.8204012513160706, "step": 3470 }, { "epoch": 0.4, "learning_rate": 1.827109914549924e-07, "logits/chosen": -3.1713786125183105, "logits/rejected": -3.632185220718384, "logps/chosen": -171.10513305664062, "logps/rejected": -350.1403503417969, "loss": 0.1547, "rewards/accuracies": 1.0, "rewards/chosen": 0.6436076164245605, "rewards/margins": 3.031108856201172, "rewards/rejected": -2.3875010013580322, "step": 3471 }, { "epoch": 0.4, "learning_rate": 1.8267587498536813e-07, "logits/chosen": -3.0028862953186035, "logits/rejected": -2.6542558670043945, "logps/chosen": -405.6163330078125, "logps/rejected": -358.9677429199219, "loss": 0.1959, "rewards/accuracies": 1.0, "rewards/chosen": 0.5053099393844604, "rewards/margins": 2.334343194961548, "rewards/rejected": -1.8290331363677979, "step": 3472 }, { "epoch": 0.4, "learning_rate": 1.8264075851574388e-07, "logits/chosen": -2.9319562911987305, "logits/rejected": -2.919583559036255, "logps/chosen": -287.0363464355469, "logps/rejected": -248.58811950683594, "loss": 0.1516, "rewards/accuracies": 1.0, "rewards/chosen": 0.28877294063568115, "rewards/margins": 2.155003070831299, "rewards/rejected": -1.8662300109863281, "step": 3473 }, { "epoch": 0.4, "learning_rate": 1.8260564204611963e-07, "logits/chosen": -3.0969252586364746, "logits/rejected": -2.9206998348236084, "logps/chosen": -323.7716064453125, "logps/rejected": -247.12319946289062, "loss": 0.2542, "rewards/accuracies": 0.875, "rewards/chosen": 0.4837474524974823, "rewards/margins": 2.381845474243164, "rewards/rejected": -1.898098111152649, "step": 3474 }, { "epoch": 0.4, "learning_rate": 1.8257052557649536e-07, "logits/chosen": -2.482708215713501, "logits/rejected": -2.549679756164551, "logps/chosen": -498.50634765625, "logps/rejected": -556.5753784179688, "loss": 0.3497, "rewards/accuracies": 1.0, "rewards/chosen": -0.04774561524391174, "rewards/margins": 2.6559560298919678, "rewards/rejected": -2.7037017345428467, "step": 3475 }, { "epoch": 0.4, "learning_rate": 1.8253540910687111e-07, "logits/chosen": -3.0582573413848877, "logits/rejected": -2.9603753089904785, "logps/chosen": -309.6461181640625, "logps/rejected": -296.5096130371094, "loss": 0.4161, "rewards/accuracies": 0.75, "rewards/chosen": 0.07479983568191528, "rewards/margins": 1.4507852792739868, "rewards/rejected": -1.3759855031967163, "step": 3476 }, { "epoch": 0.4, "learning_rate": 1.8250029263724684e-07, "logits/chosen": -3.2805378437042236, "logits/rejected": -3.347604274749756, "logps/chosen": -276.556884765625, "logps/rejected": -318.2950134277344, "loss": 0.1449, "rewards/accuracies": 1.0, "rewards/chosen": 0.5710178017616272, "rewards/margins": 2.6123299598693848, "rewards/rejected": -2.0413122177124023, "step": 3477 }, { "epoch": 0.4, "learning_rate": 1.824651761676226e-07, "logits/chosen": -3.6501729488372803, "logits/rejected": -3.6750926971435547, "logps/chosen": -183.37741088867188, "logps/rejected": -213.5255889892578, "loss": 0.4588, "rewards/accuracies": 0.75, "rewards/chosen": -0.5190547704696655, "rewards/margins": 1.7534277439117432, "rewards/rejected": -2.2724826335906982, "step": 3478 }, { "epoch": 0.4, "learning_rate": 1.8243005969799835e-07, "logits/chosen": -3.0983777046203613, "logits/rejected": -3.025247097015381, "logps/chosen": -178.22421264648438, "logps/rejected": -174.6618194580078, "loss": 0.6634, "rewards/accuracies": 0.5, "rewards/chosen": -0.31720882654190063, "rewards/margins": 0.32789894938468933, "rewards/rejected": -0.6451078057289124, "step": 3479 }, { "epoch": 0.4, "learning_rate": 1.8239494322837408e-07, "logits/chosen": -3.4038310050964355, "logits/rejected": -3.725478410720825, "logps/chosen": -394.9437255859375, "logps/rejected": -322.6715087890625, "loss": 0.2465, "rewards/accuracies": 0.875, "rewards/chosen": -0.07618457078933716, "rewards/margins": 1.709977626800537, "rewards/rejected": -1.7861621379852295, "step": 3480 }, { "epoch": 0.4, "learning_rate": 1.8235982675874986e-07, "logits/chosen": -2.801156997680664, "logits/rejected": -2.628204345703125, "logps/chosen": -257.4753723144531, "logps/rejected": -278.3416442871094, "loss": 0.1718, "rewards/accuracies": 1.0, "rewards/chosen": 0.2723209857940674, "rewards/margins": 2.5469841957092285, "rewards/rejected": -2.274663209915161, "step": 3481 }, { "epoch": 0.4, "learning_rate": 1.823247102891256e-07, "logits/chosen": -2.965620517730713, "logits/rejected": -3.482375144958496, "logps/chosen": -191.66055297851562, "logps/rejected": -169.75970458984375, "loss": 0.8041, "rewards/accuracies": 0.625, "rewards/chosen": -0.432248055934906, "rewards/margins": 0.6079539656639099, "rewards/rejected": -1.040202021598816, "step": 3482 }, { "epoch": 0.4, "learning_rate": 1.8228959381950134e-07, "logits/chosen": -2.315192222595215, "logits/rejected": -2.3022992610931396, "logps/chosen": -547.8173217773438, "logps/rejected": -330.9700622558594, "loss": 0.4336, "rewards/accuracies": 0.75, "rewards/chosen": 0.7364358901977539, "rewards/margins": 1.5207734107971191, "rewards/rejected": -0.78433758020401, "step": 3483 }, { "epoch": 0.4, "learning_rate": 1.822544773498771e-07, "logits/chosen": -3.3228416442871094, "logits/rejected": -3.421426296234131, "logps/chosen": -323.55810546875, "logps/rejected": -186.70529174804688, "loss": 0.2926, "rewards/accuracies": 0.875, "rewards/chosen": -0.07875534892082214, "rewards/margins": 1.877289056777954, "rewards/rejected": -1.956044316291809, "step": 3484 }, { "epoch": 0.4, "learning_rate": 1.8221936088025282e-07, "logits/chosen": -2.8166258335113525, "logits/rejected": -3.010793685913086, "logps/chosen": -326.30487060546875, "logps/rejected": -313.98291015625, "loss": 0.6684, "rewards/accuracies": 0.625, "rewards/chosen": -0.2543906271457672, "rewards/margins": 0.664979875087738, "rewards/rejected": -0.9193704724311829, "step": 3485 }, { "epoch": 0.4, "learning_rate": 1.8218424441062857e-07, "logits/chosen": -3.0913450717926025, "logits/rejected": -3.230609893798828, "logps/chosen": -205.50894165039062, "logps/rejected": -176.6011505126953, "loss": 0.5844, "rewards/accuracies": 0.625, "rewards/chosen": -0.4192887842655182, "rewards/margins": 1.156830072402954, "rewards/rejected": -1.5761187076568604, "step": 3486 }, { "epoch": 0.4, "learning_rate": 1.8214912794100433e-07, "logits/chosen": -3.2586112022399902, "logits/rejected": -3.238257884979248, "logps/chosen": -252.6163787841797, "logps/rejected": -182.692138671875, "loss": 0.3995, "rewards/accuracies": 0.875, "rewards/chosen": -0.03619527816772461, "rewards/margins": 1.1175763607025146, "rewards/rejected": -1.1537716388702393, "step": 3487 }, { "epoch": 0.4, "learning_rate": 1.8211401147138005e-07, "logits/chosen": -3.536797046661377, "logits/rejected": -3.7977352142333984, "logps/chosen": -130.3854217529297, "logps/rejected": -238.85816955566406, "loss": 0.3964, "rewards/accuracies": 0.75, "rewards/chosen": 0.6975823044776917, "rewards/margins": 1.4764883518218994, "rewards/rejected": -0.778905987739563, "step": 3488 }, { "epoch": 0.4, "learning_rate": 1.820788950017558e-07, "logits/chosen": -2.8117165565490723, "logits/rejected": -2.8562541007995605, "logps/chosen": -226.33157348632812, "logps/rejected": -223.89236450195312, "loss": 0.5455, "rewards/accuracies": 0.75, "rewards/chosen": -0.3207423985004425, "rewards/margins": 1.2906781435012817, "rewards/rejected": -1.6114205121994019, "step": 3489 }, { "epoch": 0.4, "learning_rate": 1.820437785321316e-07, "logits/chosen": -2.6005659103393555, "logits/rejected": -2.570387125015259, "logps/chosen": -155.2984161376953, "logps/rejected": -155.80609130859375, "loss": 0.2867, "rewards/accuracies": 1.0, "rewards/chosen": 0.1323653757572174, "rewards/margins": 1.7298380136489868, "rewards/rejected": -1.5974726676940918, "step": 3490 }, { "epoch": 0.4, "learning_rate": 1.820086620625073e-07, "logits/chosen": -3.620378017425537, "logits/rejected": -3.3464744091033936, "logps/chosen": -167.00424194335938, "logps/rejected": -125.95811462402344, "loss": 0.459, "rewards/accuracies": 0.75, "rewards/chosen": -0.4857735335826874, "rewards/margins": 0.8232100009918213, "rewards/rejected": -1.308983564376831, "step": 3491 }, { "epoch": 0.4, "learning_rate": 1.8197354559288307e-07, "logits/chosen": -3.0018129348754883, "logits/rejected": -2.81488037109375, "logps/chosen": -274.6968994140625, "logps/rejected": -263.5760803222656, "loss": 0.2185, "rewards/accuracies": 1.0, "rewards/chosen": 0.04472966492176056, "rewards/margins": 2.2247681617736816, "rewards/rejected": -2.1800384521484375, "step": 3492 }, { "epoch": 0.4, "learning_rate": 1.819384291232588e-07, "logits/chosen": -3.2814111709594727, "logits/rejected": -3.0376269817352295, "logps/chosen": -334.3322448730469, "logps/rejected": -227.94834899902344, "loss": 0.5195, "rewards/accuracies": 0.625, "rewards/chosen": -0.14294324815273285, "rewards/margins": 0.8109804391860962, "rewards/rejected": -0.953923761844635, "step": 3493 }, { "epoch": 0.4, "learning_rate": 1.8190331265363455e-07, "logits/chosen": -3.5348424911499023, "logits/rejected": -3.389540910720825, "logps/chosen": -227.6029052734375, "logps/rejected": -176.49974060058594, "loss": 0.3104, "rewards/accuracies": 0.875, "rewards/chosen": 0.6147162914276123, "rewards/margins": 1.8974261283874512, "rewards/rejected": -1.2827098369598389, "step": 3494 }, { "epoch": 0.4, "learning_rate": 1.818681961840103e-07, "logits/chosen": -3.398228168487549, "logits/rejected": -3.0416505336761475, "logps/chosen": -181.53073120117188, "logps/rejected": -218.158203125, "loss": 0.7362, "rewards/accuracies": 0.375, "rewards/chosen": -0.5922834873199463, "rewards/margins": 0.39503419399261475, "rewards/rejected": -0.987317681312561, "step": 3495 }, { "epoch": 0.4, "learning_rate": 1.8183307971438603e-07, "logits/chosen": -3.3147096633911133, "logits/rejected": -3.261153221130371, "logps/chosen": -199.16162109375, "logps/rejected": -177.08108520507812, "loss": 0.5358, "rewards/accuracies": 0.75, "rewards/chosen": -0.3324744701385498, "rewards/margins": 1.4164992570877075, "rewards/rejected": -1.7489736080169678, "step": 3496 }, { "epoch": 0.4, "learning_rate": 1.8179796324476179e-07, "logits/chosen": -2.738931894302368, "logits/rejected": -2.635317802429199, "logps/chosen": -293.00030517578125, "logps/rejected": -207.85572814941406, "loss": 0.5644, "rewards/accuracies": 0.75, "rewards/chosen": 0.2505229413509369, "rewards/margins": 0.7619376182556152, "rewards/rejected": -0.511414647102356, "step": 3497 }, { "epoch": 0.4, "learning_rate": 1.8176284677513754e-07, "logits/chosen": -2.78204345703125, "logits/rejected": -2.776705503463745, "logps/chosen": -190.83224487304688, "logps/rejected": -174.32786560058594, "loss": 0.5097, "rewards/accuracies": 0.75, "rewards/chosen": -0.360834538936615, "rewards/margins": 0.8450148105621338, "rewards/rejected": -1.2058494091033936, "step": 3498 }, { "epoch": 0.4, "learning_rate": 1.8172773030551327e-07, "logits/chosen": -3.341421604156494, "logits/rejected": -3.09098219871521, "logps/chosen": -231.8680419921875, "logps/rejected": -219.88687133789062, "loss": 0.3338, "rewards/accuracies": 0.875, "rewards/chosen": -0.10879823565483093, "rewards/margins": 1.5189762115478516, "rewards/rejected": -1.6277744770050049, "step": 3499 }, { "epoch": 0.4, "learning_rate": 1.8169261383588902e-07, "logits/chosen": -3.3972723484039307, "logits/rejected": -3.31160044670105, "logps/chosen": -203.02529907226562, "logps/rejected": -124.50965118408203, "loss": 0.5684, "rewards/accuracies": 0.5, "rewards/chosen": -0.7365705966949463, "rewards/margins": 0.7156797647476196, "rewards/rejected": -1.452250361442566, "step": 3500 }, { "epoch": 0.4, "learning_rate": 1.8165749736626475e-07, "logits/chosen": -2.3249521255493164, "logits/rejected": -2.3845362663269043, "logps/chosen": -295.1923828125, "logps/rejected": -236.73013305664062, "loss": 0.4975, "rewards/accuracies": 0.75, "rewards/chosen": -0.6185683608055115, "rewards/margins": 1.1536791324615479, "rewards/rejected": -1.7722474336624146, "step": 3501 }, { "epoch": 0.4, "learning_rate": 1.816223808966405e-07, "logits/chosen": -2.95945405960083, "logits/rejected": -3.520936965942383, "logps/chosen": -161.14306640625, "logps/rejected": -253.6451416015625, "loss": 0.3041, "rewards/accuracies": 0.875, "rewards/chosen": 0.2552975118160248, "rewards/margins": 3.4290122985839844, "rewards/rejected": -3.1737148761749268, "step": 3502 }, { "epoch": 0.4, "learning_rate": 1.8158726442701628e-07, "logits/chosen": -2.876594066619873, "logits/rejected": -2.8193209171295166, "logps/chosen": -261.20098876953125, "logps/rejected": -282.892333984375, "loss": 0.5641, "rewards/accuracies": 0.5, "rewards/chosen": -0.3670169711112976, "rewards/margins": 0.5241342782974243, "rewards/rejected": -0.8911513090133667, "step": 3503 }, { "epoch": 0.4, "learning_rate": 1.81552147957392e-07, "logits/chosen": -2.8715567588806152, "logits/rejected": -2.938143730163574, "logps/chosen": -240.77210998535156, "logps/rejected": -131.92938232421875, "loss": 0.3856, "rewards/accuracies": 0.75, "rewards/chosen": -0.3410758674144745, "rewards/margins": 1.1967374086380005, "rewards/rejected": -1.5378131866455078, "step": 3504 }, { "epoch": 0.4, "learning_rate": 1.8151703148776776e-07, "logits/chosen": -2.33023738861084, "logits/rejected": -2.721149206161499, "logps/chosen": -314.65509033203125, "logps/rejected": -266.1240234375, "loss": 0.4491, "rewards/accuracies": 0.75, "rewards/chosen": -0.0778169259428978, "rewards/margins": 1.386824607849121, "rewards/rejected": -1.4646415710449219, "step": 3505 }, { "epoch": 0.4, "learning_rate": 1.8148191501814352e-07, "logits/chosen": -3.210291862487793, "logits/rejected": -3.298276424407959, "logps/chosen": -231.75164794921875, "logps/rejected": -184.5130615234375, "loss": 0.3288, "rewards/accuracies": 0.75, "rewards/chosen": 0.4118233919143677, "rewards/margins": 2.3243932723999023, "rewards/rejected": -1.9125698804855347, "step": 3506 }, { "epoch": 0.4, "learning_rate": 1.8144679854851925e-07, "logits/chosen": -3.4119420051574707, "logits/rejected": -3.175459384918213, "logps/chosen": -377.87127685546875, "logps/rejected": -238.89112854003906, "loss": 0.3367, "rewards/accuracies": 0.875, "rewards/chosen": -0.22446171939373016, "rewards/margins": 1.336361289024353, "rewards/rejected": -1.5608230829238892, "step": 3507 }, { "epoch": 0.4, "learning_rate": 1.81411682078895e-07, "logits/chosen": -3.3921639919281006, "logits/rejected": -3.4882702827453613, "logps/chosen": -272.9784851074219, "logps/rejected": -405.99853515625, "loss": 0.6273, "rewards/accuracies": 0.625, "rewards/chosen": 0.009888291358947754, "rewards/margins": 1.6388428211212158, "rewards/rejected": -1.6289546489715576, "step": 3508 }, { "epoch": 0.4, "learning_rate": 1.8137656560927073e-07, "logits/chosen": -3.625364065170288, "logits/rejected": -3.4376864433288574, "logps/chosen": -154.8473358154297, "logps/rejected": -182.18690490722656, "loss": 0.3963, "rewards/accuracies": 0.875, "rewards/chosen": -0.024679839611053467, "rewards/margins": 1.5191617012023926, "rewards/rejected": -1.5438413619995117, "step": 3509 }, { "epoch": 0.4, "learning_rate": 1.8134144913964648e-07, "logits/chosen": -3.633683919906616, "logits/rejected": -3.8273935317993164, "logps/chosen": -275.23907470703125, "logps/rejected": -370.685546875, "loss": 0.6129, "rewards/accuracies": 0.625, "rewards/chosen": -0.32291507720947266, "rewards/margins": 0.8851771354675293, "rewards/rejected": -1.208092212677002, "step": 3510 }, { "epoch": 0.4, "learning_rate": 1.8130633267002223e-07, "logits/chosen": -2.595123767852783, "logits/rejected": -2.3078722953796387, "logps/chosen": -302.73455810546875, "logps/rejected": -248.29824829101562, "loss": 0.527, "rewards/accuracies": 0.625, "rewards/chosen": -0.2803901433944702, "rewards/margins": 0.771223247051239, "rewards/rejected": -1.051613450050354, "step": 3511 }, { "epoch": 0.4, "learning_rate": 1.8127121620039796e-07, "logits/chosen": -2.610584259033203, "logits/rejected": -2.9112067222595215, "logps/chosen": -340.950927734375, "logps/rejected": -282.0301513671875, "loss": 0.4101, "rewards/accuracies": 0.875, "rewards/chosen": 0.2517627477645874, "rewards/margins": 1.2492716312408447, "rewards/rejected": -0.9975088238716125, "step": 3512 }, { "epoch": 0.4, "learning_rate": 1.8123609973077372e-07, "logits/chosen": -3.429264545440674, "logits/rejected": -3.6341028213500977, "logps/chosen": -149.20033264160156, "logps/rejected": -275.7229309082031, "loss": 0.1171, "rewards/accuracies": 1.0, "rewards/chosen": 0.17710228264331818, "rewards/margins": 2.8497495651245117, "rewards/rejected": -2.672646999359131, "step": 3513 }, { "epoch": 0.41, "learning_rate": 1.812009832611495e-07, "logits/chosen": -3.748776435852051, "logits/rejected": -3.8666112422943115, "logps/chosen": -166.33421325683594, "logps/rejected": -190.65948486328125, "loss": 0.6712, "rewards/accuracies": 0.75, "rewards/chosen": -0.5440917015075684, "rewards/margins": 0.28966575860977173, "rewards/rejected": -0.8337573409080505, "step": 3514 }, { "epoch": 0.41, "learning_rate": 1.8116586679152522e-07, "logits/chosen": -2.9627506732940674, "logits/rejected": -3.121913433074951, "logps/chosen": -291.54486083984375, "logps/rejected": -150.74749755859375, "loss": 0.5496, "rewards/accuracies": 0.5, "rewards/chosen": -0.7176952958106995, "rewards/margins": 0.8127733469009399, "rewards/rejected": -1.5304685831069946, "step": 3515 }, { "epoch": 0.41, "learning_rate": 1.8113075032190098e-07, "logits/chosen": -3.2746124267578125, "logits/rejected": -3.2562255859375, "logps/chosen": -131.16357421875, "logps/rejected": -203.6732177734375, "loss": 0.3891, "rewards/accuracies": 0.875, "rewards/chosen": -0.024155285209417343, "rewards/margins": 1.4734981060028076, "rewards/rejected": -1.4976534843444824, "step": 3516 }, { "epoch": 0.41, "learning_rate": 1.810956338522767e-07, "logits/chosen": -3.247891902923584, "logits/rejected": -3.107728958129883, "logps/chosen": -262.46990966796875, "logps/rejected": -263.36395263671875, "loss": 0.4809, "rewards/accuracies": 0.75, "rewards/chosen": -0.23728737235069275, "rewards/margins": 1.7863588333129883, "rewards/rejected": -2.023646116256714, "step": 3517 }, { "epoch": 0.41, "learning_rate": 1.8106051738265246e-07, "logits/chosen": -3.0262341499328613, "logits/rejected": -2.7744410037994385, "logps/chosen": -179.73501586914062, "logps/rejected": -331.4676208496094, "loss": 0.3731, "rewards/accuracies": 0.625, "rewards/chosen": -0.3168907165527344, "rewards/margins": 2.8446624279022217, "rewards/rejected": -3.161553144454956, "step": 3518 }, { "epoch": 0.41, "learning_rate": 1.810254009130282e-07, "logits/chosen": -3.222296714782715, "logits/rejected": -3.4267783164978027, "logps/chosen": -304.1332702636719, "logps/rejected": -256.58868408203125, "loss": 0.1085, "rewards/accuracies": 1.0, "rewards/chosen": 0.4557272791862488, "rewards/margins": 3.2894203662872314, "rewards/rejected": -2.833693027496338, "step": 3519 }, { "epoch": 0.41, "learning_rate": 1.8099028444340394e-07, "logits/chosen": -3.3049049377441406, "logits/rejected": -3.351945400238037, "logps/chosen": -295.232177734375, "logps/rejected": -168.79864501953125, "loss": 0.3549, "rewards/accuracies": 0.625, "rewards/chosen": 0.351114422082901, "rewards/margins": 1.7252130508422852, "rewards/rejected": -1.374098539352417, "step": 3520 }, { "epoch": 0.41, "learning_rate": 1.809551679737797e-07, "logits/chosen": -3.6326072216033936, "logits/rejected": -3.6233863830566406, "logps/chosen": -184.19166564941406, "logps/rejected": -251.66482543945312, "loss": 0.2333, "rewards/accuracies": 1.0, "rewards/chosen": 0.13720515370368958, "rewards/margins": 2.3914060592651367, "rewards/rejected": -2.2542009353637695, "step": 3521 }, { "epoch": 0.41, "learning_rate": 1.8092005150415542e-07, "logits/chosen": -3.7643675804138184, "logits/rejected": -3.722980499267578, "logps/chosen": -172.59310913085938, "logps/rejected": -228.09353637695312, "loss": 0.196, "rewards/accuracies": 1.0, "rewards/chosen": 0.20166811347007751, "rewards/margins": 3.226357936859131, "rewards/rejected": -3.0246901512145996, "step": 3522 }, { "epoch": 0.41, "learning_rate": 1.8088493503453117e-07, "logits/chosen": -3.0340662002563477, "logits/rejected": -3.3380446434020996, "logps/chosen": -189.09100341796875, "logps/rejected": -273.3052978515625, "loss": 0.1255, "rewards/accuracies": 1.0, "rewards/chosen": 0.44025278091430664, "rewards/margins": 3.1986234188079834, "rewards/rejected": -2.758370876312256, "step": 3523 }, { "epoch": 0.41, "learning_rate": 1.8084981856490696e-07, "logits/chosen": -3.426180839538574, "logits/rejected": -2.935577869415283, "logps/chosen": -259.528076171875, "logps/rejected": -346.6033020019531, "loss": 0.4706, "rewards/accuracies": 0.75, "rewards/chosen": -0.4800933599472046, "rewards/margins": 0.9394630789756775, "rewards/rejected": -1.4195563793182373, "step": 3524 }, { "epoch": 0.41, "learning_rate": 1.8081470209528266e-07, "logits/chosen": -2.9008960723876953, "logits/rejected": -2.846498489379883, "logps/chosen": -263.61151123046875, "logps/rejected": -349.6339416503906, "loss": 0.8014, "rewards/accuracies": 0.625, "rewards/chosen": -0.2338694930076599, "rewards/margins": 0.1784113198518753, "rewards/rejected": -0.412280797958374, "step": 3525 }, { "epoch": 0.41, "learning_rate": 1.8077958562565844e-07, "logits/chosen": -3.256047248840332, "logits/rejected": -3.243262767791748, "logps/chosen": -268.6332092285156, "logps/rejected": -332.2796936035156, "loss": 0.3, "rewards/accuracies": 0.875, "rewards/chosen": 0.3255889415740967, "rewards/margins": 2.0704267024993896, "rewards/rejected": -1.744837760925293, "step": 3526 }, { "epoch": 0.41, "learning_rate": 1.807444691560342e-07, "logits/chosen": -2.972883939743042, "logits/rejected": -3.1339945793151855, "logps/chosen": -96.5503158569336, "logps/rejected": -188.99761962890625, "loss": 0.5467, "rewards/accuracies": 0.75, "rewards/chosen": 0.025065936148166656, "rewards/margins": 0.5969998240470886, "rewards/rejected": -0.5719338655471802, "step": 3527 }, { "epoch": 0.41, "learning_rate": 1.8070935268640992e-07, "logits/chosen": -3.1618940830230713, "logits/rejected": -2.9313883781433105, "logps/chosen": -312.5709228515625, "logps/rejected": -298.4981689453125, "loss": 0.8775, "rewards/accuracies": 0.5, "rewards/chosen": -0.8528946042060852, "rewards/margins": 0.6155151128768921, "rewards/rejected": -1.4684096574783325, "step": 3528 }, { "epoch": 0.41, "learning_rate": 1.8067423621678567e-07, "logits/chosen": -2.730638027191162, "logits/rejected": -2.5746774673461914, "logps/chosen": -334.9827575683594, "logps/rejected": -292.0713806152344, "loss": 0.5312, "rewards/accuracies": 0.75, "rewards/chosen": 0.5346776843070984, "rewards/margins": 1.5166630744934082, "rewards/rejected": -0.9819853901863098, "step": 3529 }, { "epoch": 0.41, "learning_rate": 1.806391197471614e-07, "logits/chosen": -2.695577621459961, "logits/rejected": -2.7087061405181885, "logps/chosen": -305.7821960449219, "logps/rejected": -233.46151733398438, "loss": 0.2069, "rewards/accuracies": 1.0, "rewards/chosen": 0.1624404489994049, "rewards/margins": 2.9837310314178467, "rewards/rejected": -2.8212904930114746, "step": 3530 }, { "epoch": 0.41, "learning_rate": 1.8060400327753715e-07, "logits/chosen": -3.1361608505249023, "logits/rejected": -2.916029453277588, "logps/chosen": -320.7582092285156, "logps/rejected": -185.6348419189453, "loss": 0.3569, "rewards/accuracies": 0.875, "rewards/chosen": 0.040156908333301544, "rewards/margins": 1.3215934038162231, "rewards/rejected": -1.281436562538147, "step": 3531 }, { "epoch": 0.41, "learning_rate": 1.805688868079129e-07, "logits/chosen": -2.852931022644043, "logits/rejected": -2.800537109375, "logps/chosen": -257.8814392089844, "logps/rejected": -314.03094482421875, "loss": 0.5626, "rewards/accuracies": 0.75, "rewards/chosen": -0.04305630177259445, "rewards/margins": 1.6780409812927246, "rewards/rejected": -1.7210972309112549, "step": 3532 }, { "epoch": 0.41, "learning_rate": 1.8053377033828863e-07, "logits/chosen": -3.238154888153076, "logits/rejected": -3.3996310234069824, "logps/chosen": -310.34698486328125, "logps/rejected": -235.49856567382812, "loss": 0.2456, "rewards/accuracies": 1.0, "rewards/chosen": 0.6982054710388184, "rewards/margins": 2.295017719268799, "rewards/rejected": -1.5968122482299805, "step": 3533 }, { "epoch": 0.41, "learning_rate": 1.804986538686644e-07, "logits/chosen": -2.837984800338745, "logits/rejected": -3.119460105895996, "logps/chosen": -267.6942138671875, "logps/rejected": -272.9362487792969, "loss": 0.2073, "rewards/accuracies": 0.875, "rewards/chosen": -0.07215878367424011, "rewards/margins": 2.4345455169677734, "rewards/rejected": -2.506704330444336, "step": 3534 }, { "epoch": 0.41, "learning_rate": 1.8046353739904017e-07, "logits/chosen": -2.6964151859283447, "logits/rejected": -2.881702184677124, "logps/chosen": -341.4447021484375, "logps/rejected": -274.589111328125, "loss": 0.3022, "rewards/accuracies": 0.875, "rewards/chosen": 0.6412876844406128, "rewards/margins": 1.657568097114563, "rewards/rejected": -1.0162804126739502, "step": 3535 }, { "epoch": 0.41, "learning_rate": 1.8042842092941587e-07, "logits/chosen": -2.972026824951172, "logits/rejected": -3.1504030227661133, "logps/chosen": -240.920166015625, "logps/rejected": -268.59295654296875, "loss": 0.5927, "rewards/accuracies": 0.625, "rewards/chosen": -0.038548290729522705, "rewards/margins": 0.7191018462181091, "rewards/rejected": -0.7576501369476318, "step": 3536 }, { "epoch": 0.41, "learning_rate": 1.8039330445979165e-07, "logits/chosen": -2.6403419971466064, "logits/rejected": -2.7194504737854004, "logps/chosen": -320.1580505371094, "logps/rejected": -171.84414672851562, "loss": 0.6675, "rewards/accuracies": 0.75, "rewards/chosen": -0.3283534646034241, "rewards/margins": 0.5059452652931213, "rewards/rejected": -0.8342987298965454, "step": 3537 }, { "epoch": 0.41, "learning_rate": 1.8035818799016738e-07, "logits/chosen": -2.829660415649414, "logits/rejected": -3.1425180435180664, "logps/chosen": -297.35687255859375, "logps/rejected": -259.4021301269531, "loss": 0.5342, "rewards/accuracies": 0.875, "rewards/chosen": -0.4285831153392792, "rewards/margins": 0.7417243123054504, "rewards/rejected": -1.1703073978424072, "step": 3538 }, { "epoch": 0.41, "learning_rate": 1.8032307152054313e-07, "logits/chosen": -3.6579337120056152, "logits/rejected": -3.167973518371582, "logps/chosen": -269.94598388671875, "logps/rejected": -185.7836151123047, "loss": 0.4202, "rewards/accuracies": 0.75, "rewards/chosen": 0.14276990294456482, "rewards/margins": 2.0528042316436768, "rewards/rejected": -1.910034418106079, "step": 3539 }, { "epoch": 0.41, "learning_rate": 1.8028795505091888e-07, "logits/chosen": -3.599830150604248, "logits/rejected": -3.450124502182007, "logps/chosen": -171.15365600585938, "logps/rejected": -245.3795166015625, "loss": 0.4425, "rewards/accuracies": 0.75, "rewards/chosen": 0.07464843988418579, "rewards/margins": 1.3255510330200195, "rewards/rejected": -1.250902533531189, "step": 3540 }, { "epoch": 0.41, "learning_rate": 1.802528385812946e-07, "logits/chosen": -3.2925190925598145, "logits/rejected": -3.050676107406616, "logps/chosen": -186.185791015625, "logps/rejected": -280.36651611328125, "loss": 0.4658, "rewards/accuracies": 0.875, "rewards/chosen": -0.28264689445495605, "rewards/margins": 1.1117125749588013, "rewards/rejected": -1.3943595886230469, "step": 3541 }, { "epoch": 0.41, "learning_rate": 1.8021772211167037e-07, "logits/chosen": -3.0572071075439453, "logits/rejected": -2.9025843143463135, "logps/chosen": -325.84466552734375, "logps/rejected": -357.8912353515625, "loss": 0.4736, "rewards/accuracies": 0.875, "rewards/chosen": 0.2489316463470459, "rewards/margins": 0.8641751408576965, "rewards/rejected": -0.6152435541152954, "step": 3542 }, { "epoch": 0.41, "learning_rate": 1.8018260564204612e-07, "logits/chosen": -3.3201866149902344, "logits/rejected": -3.6926050186157227, "logps/chosen": -180.25425720214844, "logps/rejected": -418.7351989746094, "loss": 0.1679, "rewards/accuracies": 1.0, "rewards/chosen": -0.24111074209213257, "rewards/margins": 4.011889934539795, "rewards/rejected": -4.253000736236572, "step": 3543 }, { "epoch": 0.41, "learning_rate": 1.8014748917242185e-07, "logits/chosen": -3.4852776527404785, "logits/rejected": -3.5088112354278564, "logps/chosen": -225.91748046875, "logps/rejected": -394.4898681640625, "loss": 0.1458, "rewards/accuracies": 1.0, "rewards/chosen": 0.9017502069473267, "rewards/margins": 4.778542995452881, "rewards/rejected": -3.8767926692962646, "step": 3544 }, { "epoch": 0.41, "learning_rate": 1.801123727027976e-07, "logits/chosen": -3.647865056991577, "logits/rejected": -3.666170597076416, "logps/chosen": -198.337646484375, "logps/rejected": -202.23406982421875, "loss": 0.1704, "rewards/accuracies": 0.875, "rewards/chosen": 0.2849385738372803, "rewards/margins": 2.9037084579467773, "rewards/rejected": -2.618769884109497, "step": 3545 }, { "epoch": 0.41, "learning_rate": 1.8007725623317333e-07, "logits/chosen": -3.5994811058044434, "logits/rejected": -3.402290105819702, "logps/chosen": -493.35211181640625, "logps/rejected": -249.50746154785156, "loss": 0.2979, "rewards/accuracies": 0.75, "rewards/chosen": 0.03033638373017311, "rewards/margins": 2.3456578254699707, "rewards/rejected": -2.315321445465088, "step": 3546 }, { "epoch": 0.41, "learning_rate": 1.8004213976354908e-07, "logits/chosen": -3.00870680809021, "logits/rejected": -2.7908101081848145, "logps/chosen": -261.6490478515625, "logps/rejected": -243.97512817382812, "loss": 0.5655, "rewards/accuracies": 0.625, "rewards/chosen": -0.2533054053783417, "rewards/margins": 1.5803775787353516, "rewards/rejected": -1.8336832523345947, "step": 3547 }, { "epoch": 0.41, "learning_rate": 1.8000702329392486e-07, "logits/chosen": -2.3727807998657227, "logits/rejected": -2.5971624851226807, "logps/chosen": -340.977783203125, "logps/rejected": -287.73504638671875, "loss": 0.3063, "rewards/accuracies": 0.875, "rewards/chosen": 0.3282092213630676, "rewards/margins": 1.6488394737243652, "rewards/rejected": -1.3206300735473633, "step": 3548 }, { "epoch": 0.41, "learning_rate": 1.799719068243006e-07, "logits/chosen": -3.045339584350586, "logits/rejected": -3.395671844482422, "logps/chosen": -314.1576843261719, "logps/rejected": -296.9385986328125, "loss": 0.8336, "rewards/accuracies": 0.625, "rewards/chosen": -0.671963632106781, "rewards/margins": 2.199672222137451, "rewards/rejected": -2.8716354370117188, "step": 3549 }, { "epoch": 0.41, "learning_rate": 1.7993679035467634e-07, "logits/chosen": -3.4149508476257324, "logits/rejected": -3.393261432647705, "logps/chosen": -181.16526794433594, "logps/rejected": -187.52818298339844, "loss": 0.3526, "rewards/accuracies": 0.875, "rewards/chosen": 0.8653045296669006, "rewards/margins": 2.1019089221954346, "rewards/rejected": -1.2366042137145996, "step": 3550 }, { "epoch": 0.41, "learning_rate": 1.799016738850521e-07, "logits/chosen": -3.4932665824890137, "logits/rejected": -3.396778106689453, "logps/chosen": -445.5292053222656, "logps/rejected": -223.22637939453125, "loss": 0.2947, "rewards/accuracies": 0.875, "rewards/chosen": -0.12282834202051163, "rewards/margins": 1.9155761003494263, "rewards/rejected": -2.0384044647216797, "step": 3551 }, { "epoch": 0.41, "learning_rate": 1.7986655741542782e-07, "logits/chosen": -3.0920093059539795, "logits/rejected": -2.52260160446167, "logps/chosen": -480.3854064941406, "logps/rejected": -315.7304382324219, "loss": 0.5029, "rewards/accuracies": 0.75, "rewards/chosen": -0.2985793948173523, "rewards/margins": 0.8601231575012207, "rewards/rejected": -1.1587024927139282, "step": 3552 }, { "epoch": 0.41, "learning_rate": 1.7983144094580358e-07, "logits/chosen": -3.444403886795044, "logits/rejected": -3.4472296237945557, "logps/chosen": -244.19346618652344, "logps/rejected": -232.25140380859375, "loss": 0.5606, "rewards/accuracies": 0.75, "rewards/chosen": 0.0017276853322982788, "rewards/margins": 0.7963016629219055, "rewards/rejected": -0.794573962688446, "step": 3553 }, { "epoch": 0.41, "learning_rate": 1.797963244761793e-07, "logits/chosen": -3.444579601287842, "logits/rejected": -3.6395111083984375, "logps/chosen": -266.3729248046875, "logps/rejected": -278.0374755859375, "loss": 0.2473, "rewards/accuracies": 0.875, "rewards/chosen": 0.19648362696170807, "rewards/margins": 1.9721672534942627, "rewards/rejected": -1.7756834030151367, "step": 3554 }, { "epoch": 0.41, "learning_rate": 1.7976120800655506e-07, "logits/chosen": -3.6600799560546875, "logits/rejected": -3.525402307510376, "logps/chosen": -378.540283203125, "logps/rejected": -248.02169799804688, "loss": 0.5229, "rewards/accuracies": 0.625, "rewards/chosen": 0.1571844518184662, "rewards/margins": 1.1672313213348389, "rewards/rejected": -1.0100469589233398, "step": 3555 }, { "epoch": 0.41, "learning_rate": 1.7972609153693081e-07, "logits/chosen": -3.34706711769104, "logits/rejected": -3.369274854660034, "logps/chosen": -157.0826416015625, "logps/rejected": -130.1790771484375, "loss": 0.6179, "rewards/accuracies": 0.5, "rewards/chosen": -0.29796284437179565, "rewards/margins": 0.8686269521713257, "rewards/rejected": -1.1665897369384766, "step": 3556 }, { "epoch": 0.41, "learning_rate": 1.7969097506730654e-07, "logits/chosen": -3.1978306770324707, "logits/rejected": -3.237532138824463, "logps/chosen": -383.8458251953125, "logps/rejected": -388.5544738769531, "loss": 0.362, "rewards/accuracies": 0.75, "rewards/chosen": 0.05847259610891342, "rewards/margins": 1.5209019184112549, "rewards/rejected": -1.4624292850494385, "step": 3557 }, { "epoch": 0.41, "learning_rate": 1.7965585859768232e-07, "logits/chosen": -2.982764959335327, "logits/rejected": -2.586331844329834, "logps/chosen": -258.25738525390625, "logps/rejected": -280.41949462890625, "loss": 0.612, "rewards/accuracies": 0.75, "rewards/chosen": 0.2329237163066864, "rewards/margins": 1.338122844696045, "rewards/rejected": -1.1051990985870361, "step": 3558 }, { "epoch": 0.41, "learning_rate": 1.7962074212805808e-07, "logits/chosen": -3.6966843605041504, "logits/rejected": -3.742447853088379, "logps/chosen": -301.4953308105469, "logps/rejected": -374.72808837890625, "loss": 0.1556, "rewards/accuracies": 0.875, "rewards/chosen": -0.5347863435745239, "rewards/margins": 2.4585700035095215, "rewards/rejected": -2.993356466293335, "step": 3559 }, { "epoch": 0.41, "learning_rate": 1.795856256584338e-07, "logits/chosen": -3.516669273376465, "logits/rejected": -3.243940591812134, "logps/chosen": -191.81639099121094, "logps/rejected": -162.83828735351562, "loss": 0.3438, "rewards/accuracies": 0.875, "rewards/chosen": 0.3171611428260803, "rewards/margins": 2.252139091491699, "rewards/rejected": -1.9349777698516846, "step": 3560 }, { "epoch": 0.41, "learning_rate": 1.7955050918880956e-07, "logits/chosen": -3.2200703620910645, "logits/rejected": -2.896397829055786, "logps/chosen": -377.1133117675781, "logps/rejected": -352.603759765625, "loss": 0.7102, "rewards/accuracies": 0.625, "rewards/chosen": -0.009619921445846558, "rewards/margins": 1.5102200508117676, "rewards/rejected": -1.5198400020599365, "step": 3561 }, { "epoch": 0.41, "learning_rate": 1.7951539271918528e-07, "logits/chosen": -2.9030487537384033, "logits/rejected": -3.216236114501953, "logps/chosen": -317.156982421875, "logps/rejected": -229.81228637695312, "loss": 0.649, "rewards/accuracies": 0.625, "rewards/chosen": -0.6754428744316101, "rewards/margins": 1.6627082824707031, "rewards/rejected": -2.338150978088379, "step": 3562 }, { "epoch": 0.41, "learning_rate": 1.7948027624956104e-07, "logits/chosen": -3.3277573585510254, "logits/rejected": -3.6093921661376953, "logps/chosen": -112.35592651367188, "logps/rejected": -235.80731201171875, "loss": 0.764, "rewards/accuracies": 0.625, "rewards/chosen": -0.14807671308517456, "rewards/margins": 1.7362470626831055, "rewards/rejected": -1.8843238353729248, "step": 3563 }, { "epoch": 0.41, "learning_rate": 1.794451597799368e-07, "logits/chosen": -2.754390239715576, "logits/rejected": -2.4703493118286133, "logps/chosen": -148.1443328857422, "logps/rejected": -265.64007568359375, "loss": 0.3288, "rewards/accuracies": 0.875, "rewards/chosen": -0.1317780613899231, "rewards/margins": 1.9318079948425293, "rewards/rejected": -2.0635862350463867, "step": 3564 }, { "epoch": 0.41, "learning_rate": 1.7941004331031252e-07, "logits/chosen": -3.497138738632202, "logits/rejected": -3.274937152862549, "logps/chosen": -155.88491821289062, "logps/rejected": -208.86766052246094, "loss": 0.3431, "rewards/accuracies": 0.875, "rewards/chosen": 0.34379512071609497, "rewards/margins": 1.904645562171936, "rewards/rejected": -1.5608505010604858, "step": 3565 }, { "epoch": 0.41, "learning_rate": 1.7937492684068827e-07, "logits/chosen": -3.0342555046081543, "logits/rejected": -3.0455422401428223, "logps/chosen": -353.06744384765625, "logps/rejected": -405.1575927734375, "loss": 0.3666, "rewards/accuracies": 0.625, "rewards/chosen": 0.0693933516740799, "rewards/margins": 2.194253921508789, "rewards/rejected": -2.1248605251312256, "step": 3566 }, { "epoch": 0.41, "learning_rate": 1.7933981037106403e-07, "logits/chosen": -2.927114725112915, "logits/rejected": -2.704010009765625, "logps/chosen": -232.83078002929688, "logps/rejected": -276.80255126953125, "loss": 0.5269, "rewards/accuracies": 0.75, "rewards/chosen": 0.0649225041270256, "rewards/margins": 1.9575031995773315, "rewards/rejected": -1.8925807476043701, "step": 3567 }, { "epoch": 0.41, "learning_rate": 1.7930469390143975e-07, "logits/chosen": -3.120628833770752, "logits/rejected": -3.0751068592071533, "logps/chosen": -282.2817687988281, "logps/rejected": -229.83148193359375, "loss": 0.3624, "rewards/accuracies": 0.875, "rewards/chosen": -0.09228713810443878, "rewards/margins": 1.308052897453308, "rewards/rejected": -1.4003400802612305, "step": 3568 }, { "epoch": 0.41, "learning_rate": 1.7926957743181553e-07, "logits/chosen": -3.197434663772583, "logits/rejected": -3.1529765129089355, "logps/chosen": -235.1815185546875, "logps/rejected": -247.06492614746094, "loss": 0.4944, "rewards/accuracies": 0.625, "rewards/chosen": -0.48043420910835266, "rewards/margins": 1.019172191619873, "rewards/rejected": -1.4996063709259033, "step": 3569 }, { "epoch": 0.41, "learning_rate": 1.7923446096219124e-07, "logits/chosen": -3.244213104248047, "logits/rejected": -3.5436325073242188, "logps/chosen": -139.10414123535156, "logps/rejected": -166.80296325683594, "loss": 0.2089, "rewards/accuracies": 1.0, "rewards/chosen": 0.14913733303546906, "rewards/margins": 2.1221837997436523, "rewards/rejected": -1.9730464220046997, "step": 3570 }, { "epoch": 0.41, "learning_rate": 1.7919934449256702e-07, "logits/chosen": -3.663511276245117, "logits/rejected": -3.7316360473632812, "logps/chosen": -140.53834533691406, "logps/rejected": -241.27655029296875, "loss": 0.1211, "rewards/accuracies": 1.0, "rewards/chosen": 0.721221923828125, "rewards/margins": 3.1858344078063965, "rewards/rejected": -2.4646124839782715, "step": 3571 }, { "epoch": 0.41, "learning_rate": 1.7916422802294277e-07, "logits/chosen": -2.329477310180664, "logits/rejected": -2.4047067165374756, "logps/chosen": -271.47418212890625, "logps/rejected": -278.38818359375, "loss": 0.354, "rewards/accuracies": 1.0, "rewards/chosen": -0.2950696051120758, "rewards/margins": 1.474135160446167, "rewards/rejected": -1.76920485496521, "step": 3572 }, { "epoch": 0.41, "learning_rate": 1.791291115533185e-07, "logits/chosen": -3.5097479820251465, "logits/rejected": -3.0119855403900146, "logps/chosen": -306.4727783203125, "logps/rejected": -197.92445373535156, "loss": 0.2785, "rewards/accuracies": 0.875, "rewards/chosen": -0.28227612376213074, "rewards/margins": 1.8367836475372314, "rewards/rejected": -2.1190598011016846, "step": 3573 }, { "epoch": 0.41, "learning_rate": 1.7909399508369425e-07, "logits/chosen": -3.6271681785583496, "logits/rejected": -3.247648000717163, "logps/chosen": -339.15997314453125, "logps/rejected": -238.498291015625, "loss": 0.676, "rewards/accuracies": 0.5, "rewards/chosen": -1.0342439413070679, "rewards/margins": 0.9076406359672546, "rewards/rejected": -1.9418846368789673, "step": 3574 }, { "epoch": 0.41, "learning_rate": 1.7905887861406998e-07, "logits/chosen": -2.9825615882873535, "logits/rejected": -3.0818192958831787, "logps/chosen": -412.22479248046875, "logps/rejected": -321.6934509277344, "loss": 0.8278, "rewards/accuracies": 0.375, "rewards/chosen": -0.9805960059165955, "rewards/margins": 0.8124275207519531, "rewards/rejected": -1.7930233478546143, "step": 3575 }, { "epoch": 0.41, "learning_rate": 1.7902376214444573e-07, "logits/chosen": -3.1296074390411377, "logits/rejected": -3.39495587348938, "logps/chosen": -184.06619262695312, "logps/rejected": -319.42950439453125, "loss": 0.3087, "rewards/accuracies": 0.875, "rewards/chosen": -0.003612454980611801, "rewards/margins": 3.005953311920166, "rewards/rejected": -3.009565591812134, "step": 3576 }, { "epoch": 0.41, "learning_rate": 1.7898864567482149e-07, "logits/chosen": -2.3476271629333496, "logits/rejected": -2.5994365215301514, "logps/chosen": -227.3824462890625, "logps/rejected": -255.1779327392578, "loss": 0.0991, "rewards/accuracies": 1.0, "rewards/chosen": -0.06441403925418854, "rewards/margins": 2.780094861984253, "rewards/rejected": -2.844508647918701, "step": 3577 }, { "epoch": 0.41, "learning_rate": 1.789535292051972e-07, "logits/chosen": -2.42329740524292, "logits/rejected": -2.829495668411255, "logps/chosen": -282.9150390625, "logps/rejected": -226.6349334716797, "loss": 0.5613, "rewards/accuracies": 0.75, "rewards/chosen": -0.0601639449596405, "rewards/margins": 1.396545171737671, "rewards/rejected": -1.4567091464996338, "step": 3578 }, { "epoch": 0.41, "learning_rate": 1.7891841273557297e-07, "logits/chosen": -3.555476427078247, "logits/rejected": -3.6050782203674316, "logps/chosen": -251.6819610595703, "logps/rejected": -362.14373779296875, "loss": 0.3381, "rewards/accuracies": 0.75, "rewards/chosen": -0.46539461612701416, "rewards/margins": 2.855320453643799, "rewards/rejected": -3.3207151889801025, "step": 3579 }, { "epoch": 0.41, "learning_rate": 1.7888329626594875e-07, "logits/chosen": -3.1268386840820312, "logits/rejected": -2.943819284439087, "logps/chosen": -390.14190673828125, "logps/rejected": -163.991455078125, "loss": 0.5289, "rewards/accuracies": 0.75, "rewards/chosen": -0.49210745096206665, "rewards/margins": 0.7762027978897095, "rewards/rejected": -1.2683101892471313, "step": 3580 }, { "epoch": 0.41, "learning_rate": 1.7884817979632445e-07, "logits/chosen": -3.3787033557891846, "logits/rejected": -3.035946846008301, "logps/chosen": -146.97305297851562, "logps/rejected": -202.9108123779297, "loss": 0.5315, "rewards/accuracies": 0.5, "rewards/chosen": 0.06655339151620865, "rewards/margins": 1.419156789779663, "rewards/rejected": -1.3526034355163574, "step": 3581 }, { "epoch": 0.41, "learning_rate": 1.7881306332670023e-07, "logits/chosen": -2.947458267211914, "logits/rejected": -2.6708731651306152, "logps/chosen": -304.565673828125, "logps/rejected": -176.69415283203125, "loss": 0.3125, "rewards/accuracies": 0.875, "rewards/chosen": -0.3028818964958191, "rewards/margins": 1.72908616065979, "rewards/rejected": -2.031968116760254, "step": 3582 }, { "epoch": 0.41, "learning_rate": 1.7877794685707596e-07, "logits/chosen": -2.65566349029541, "logits/rejected": -2.5557210445404053, "logps/chosen": -431.9498291015625, "logps/rejected": -304.64019775390625, "loss": 0.4772, "rewards/accuracies": 0.875, "rewards/chosen": -0.41195517778396606, "rewards/margins": 1.254706859588623, "rewards/rejected": -1.6666619777679443, "step": 3583 }, { "epoch": 0.41, "learning_rate": 1.787428303874517e-07, "logits/chosen": -3.0637857913970947, "logits/rejected": -3.0384857654571533, "logps/chosen": -167.08755493164062, "logps/rejected": -156.80093383789062, "loss": 0.6778, "rewards/accuracies": 0.75, "rewards/chosen": -0.5385434627532959, "rewards/margins": 0.9841570258140564, "rewards/rejected": -1.522700548171997, "step": 3584 }, { "epoch": 0.41, "learning_rate": 1.7870771391782746e-07, "logits/chosen": -3.107322931289673, "logits/rejected": -2.8449246883392334, "logps/chosen": -361.24346923828125, "logps/rejected": -272.8359375, "loss": 0.6084, "rewards/accuracies": 0.75, "rewards/chosen": 0.02595856785774231, "rewards/margins": 0.957190990447998, "rewards/rejected": -0.9312323927879333, "step": 3585 }, { "epoch": 0.41, "learning_rate": 1.786725974482032e-07, "logits/chosen": -3.466798782348633, "logits/rejected": -3.1818337440490723, "logps/chosen": -299.8273010253906, "logps/rejected": -281.3395080566406, "loss": 0.7945, "rewards/accuracies": 0.625, "rewards/chosen": -0.746163547039032, "rewards/margins": 1.2158188819885254, "rewards/rejected": -1.9619824886322021, "step": 3586 }, { "epoch": 0.41, "learning_rate": 1.7863748097857894e-07, "logits/chosen": -2.857988119125366, "logits/rejected": -3.1148555278778076, "logps/chosen": -192.47434997558594, "logps/rejected": -215.66024780273438, "loss": 0.2889, "rewards/accuracies": 0.875, "rewards/chosen": -0.16386285424232483, "rewards/margins": 2.3750767707824707, "rewards/rejected": -2.5389397144317627, "step": 3587 }, { "epoch": 0.41, "learning_rate": 1.786023645089547e-07, "logits/chosen": -2.8350830078125, "logits/rejected": -3.085730791091919, "logps/chosen": -332.0268249511719, "logps/rejected": -253.9791717529297, "loss": 0.2441, "rewards/accuracies": 0.875, "rewards/chosen": -0.08461606502532959, "rewards/margins": 3.1047372817993164, "rewards/rejected": -3.1893532276153564, "step": 3588 }, { "epoch": 0.41, "learning_rate": 1.7856724803933043e-07, "logits/chosen": -3.738708257675171, "logits/rejected": -3.707634687423706, "logps/chosen": -347.7174072265625, "logps/rejected": -179.05740356445312, "loss": 0.5806, "rewards/accuracies": 0.75, "rewards/chosen": 0.038574814796447754, "rewards/margins": 1.0742621421813965, "rewards/rejected": -1.0356874465942383, "step": 3589 }, { "epoch": 0.41, "learning_rate": 1.7853213156970618e-07, "logits/chosen": -2.3237740993499756, "logits/rejected": -2.5790634155273438, "logps/chosen": -445.1953430175781, "logps/rejected": -381.7630615234375, "loss": 0.3253, "rewards/accuracies": 0.875, "rewards/chosen": 0.5266571044921875, "rewards/margins": 1.8418421745300293, "rewards/rejected": -1.3151849508285522, "step": 3590 }, { "epoch": 0.41, "learning_rate": 1.784970151000819e-07, "logits/chosen": -2.8597424030303955, "logits/rejected": -2.7405660152435303, "logps/chosen": -153.71580505371094, "logps/rejected": -251.53204345703125, "loss": 0.2879, "rewards/accuracies": 0.875, "rewards/chosen": 0.13036289811134338, "rewards/margins": 1.8805550336837769, "rewards/rejected": -1.7501921653747559, "step": 3591 }, { "epoch": 0.41, "learning_rate": 1.784618986304577e-07, "logits/chosen": -2.4824888706207275, "logits/rejected": -2.765441656112671, "logps/chosen": -331.6485595703125, "logps/rejected": -375.02642822265625, "loss": 0.5276, "rewards/accuracies": 0.75, "rewards/chosen": -0.0673438012599945, "rewards/margins": 1.6951868534088135, "rewards/rejected": -1.76253080368042, "step": 3592 }, { "epoch": 0.41, "learning_rate": 1.7842678216083344e-07, "logits/chosen": -2.984872341156006, "logits/rejected": -3.0500099658966064, "logps/chosen": -437.6054382324219, "logps/rejected": -493.95220947265625, "loss": 0.497, "rewards/accuracies": 0.625, "rewards/chosen": -0.4914587140083313, "rewards/margins": 0.8893882632255554, "rewards/rejected": -1.3808469772338867, "step": 3593 }, { "epoch": 0.41, "learning_rate": 1.7839166569120917e-07, "logits/chosen": -3.6472840309143066, "logits/rejected": -3.6511573791503906, "logps/chosen": -247.15640258789062, "logps/rejected": -214.354736328125, "loss": 0.494, "rewards/accuracies": 0.75, "rewards/chosen": -0.37301820516586304, "rewards/margins": 1.428259015083313, "rewards/rejected": -1.8012771606445312, "step": 3594 }, { "epoch": 0.41, "learning_rate": 1.7835654922158492e-07, "logits/chosen": -3.2311148643493652, "logits/rejected": -3.4611332416534424, "logps/chosen": -244.40472412109375, "logps/rejected": -243.2731475830078, "loss": 0.4239, "rewards/accuracies": 0.875, "rewards/chosen": 0.3222489058971405, "rewards/margins": 1.739262580871582, "rewards/rejected": -1.4170136451721191, "step": 3595 }, { "epoch": 0.41, "learning_rate": 1.7832143275196068e-07, "logits/chosen": -3.2573282718658447, "logits/rejected": -3.61942982673645, "logps/chosen": -313.6560974121094, "logps/rejected": -289.79425048828125, "loss": 0.4369, "rewards/accuracies": 0.75, "rewards/chosen": 0.12074257433414459, "rewards/margins": 1.6523836851119995, "rewards/rejected": -1.5316412448883057, "step": 3596 }, { "epoch": 0.41, "learning_rate": 1.782863162823364e-07, "logits/chosen": -3.2022879123687744, "logits/rejected": -3.108163356781006, "logps/chosen": -221.5350799560547, "logps/rejected": -230.35195922851562, "loss": 0.4047, "rewards/accuracies": 0.875, "rewards/chosen": 0.21291349828243256, "rewards/margins": 1.4624825716018677, "rewards/rejected": -1.249569058418274, "step": 3597 }, { "epoch": 0.41, "learning_rate": 1.7825119981271216e-07, "logits/chosen": -3.629181385040283, "logits/rejected": -3.476834774017334, "logps/chosen": -208.03732299804688, "logps/rejected": -194.42080688476562, "loss": 0.3724, "rewards/accuracies": 0.75, "rewards/chosen": 0.11159797012805939, "rewards/margins": 1.0596052408218384, "rewards/rejected": -0.9480072855949402, "step": 3598 }, { "epoch": 0.41, "learning_rate": 1.7821608334308789e-07, "logits/chosen": -3.6371102333068848, "logits/rejected": -3.6974854469299316, "logps/chosen": -247.7490997314453, "logps/rejected": -332.53143310546875, "loss": 0.2142, "rewards/accuracies": 0.875, "rewards/chosen": 0.6866492629051208, "rewards/margins": 3.1044065952301025, "rewards/rejected": -2.417757034301758, "step": 3599 }, { "epoch": 0.42, "learning_rate": 1.7818096687346364e-07, "logits/chosen": -2.1971964836120605, "logits/rejected": -2.1297290325164795, "logps/chosen": -332.1491394042969, "logps/rejected": -222.23915100097656, "loss": 0.2869, "rewards/accuracies": 1.0, "rewards/chosen": 0.015263639390468597, "rewards/margins": 1.3945873975753784, "rewards/rejected": -1.3793237209320068, "step": 3600 }, { "epoch": 0.42, "learning_rate": 1.781458504038394e-07, "logits/chosen": -3.243313789367676, "logits/rejected": -2.9705090522766113, "logps/chosen": -244.26766967773438, "logps/rejected": -229.92703247070312, "loss": 0.5011, "rewards/accuracies": 0.625, "rewards/chosen": -0.3790815472602844, "rewards/margins": 0.8491986393928528, "rewards/rejected": -1.2282801866531372, "step": 3601 }, { "epoch": 0.42, "learning_rate": 1.7811073393421512e-07, "logits/chosen": -3.493864059448242, "logits/rejected": -3.3653013706207275, "logps/chosen": -254.73472595214844, "logps/rejected": -272.8605041503906, "loss": 0.3372, "rewards/accuracies": 0.875, "rewards/chosen": 0.37540706992149353, "rewards/margins": 1.1201491355895996, "rewards/rejected": -0.7447421550750732, "step": 3602 }, { "epoch": 0.42, "learning_rate": 1.780756174645909e-07, "logits/chosen": -2.4711759090423584, "logits/rejected": -2.2874808311462402, "logps/chosen": -136.7311553955078, "logps/rejected": -369.6774597167969, "loss": 0.1242, "rewards/accuracies": 1.0, "rewards/chosen": 0.2830653786659241, "rewards/margins": 2.8032593727111816, "rewards/rejected": -2.5201942920684814, "step": 3603 }, { "epoch": 0.42, "learning_rate": 1.7804050099496665e-07, "logits/chosen": -3.1284470558166504, "logits/rejected": -3.427149772644043, "logps/chosen": -165.9254608154297, "logps/rejected": -252.3822021484375, "loss": 0.2473, "rewards/accuracies": 0.875, "rewards/chosen": -0.23782917857170105, "rewards/margins": 2.6813595294952393, "rewards/rejected": -2.919188976287842, "step": 3604 }, { "epoch": 0.42, "learning_rate": 1.7800538452534238e-07, "logits/chosen": -3.4391682147979736, "logits/rejected": -3.4757349491119385, "logps/chosen": -303.8683776855469, "logps/rejected": -316.22369384765625, "loss": 0.3487, "rewards/accuracies": 0.875, "rewards/chosen": -0.11527447402477264, "rewards/margins": 1.7317123413085938, "rewards/rejected": -1.8469866514205933, "step": 3605 }, { "epoch": 0.42, "learning_rate": 1.7797026805571814e-07, "logits/chosen": -3.324484348297119, "logits/rejected": -3.269904613494873, "logps/chosen": -275.097900390625, "logps/rejected": -203.29835510253906, "loss": 0.3669, "rewards/accuracies": 0.875, "rewards/chosen": 0.013506542891263962, "rewards/margins": 2.093096971511841, "rewards/rejected": -2.0795905590057373, "step": 3606 }, { "epoch": 0.42, "learning_rate": 1.7793515158609386e-07, "logits/chosen": -2.802933692932129, "logits/rejected": -2.6279678344726562, "logps/chosen": -127.04118347167969, "logps/rejected": -227.58680725097656, "loss": 0.4356, "rewards/accuracies": 0.625, "rewards/chosen": 0.07712316513061523, "rewards/margins": 1.8498387336730957, "rewards/rejected": -1.7727155685424805, "step": 3607 }, { "epoch": 0.42, "learning_rate": 1.7790003511646962e-07, "logits/chosen": -3.744924545288086, "logits/rejected": -3.696375846862793, "logps/chosen": -202.2759246826172, "logps/rejected": -252.5869140625, "loss": 0.1905, "rewards/accuracies": 0.875, "rewards/chosen": 0.4990552067756653, "rewards/margins": 3.599550724029541, "rewards/rejected": -3.1004955768585205, "step": 3608 }, { "epoch": 0.42, "learning_rate": 1.7786491864684537e-07, "logits/chosen": -3.4189586639404297, "logits/rejected": -3.3847310543060303, "logps/chosen": -283.3072204589844, "logps/rejected": -291.08740234375, "loss": 0.5317, "rewards/accuracies": 0.625, "rewards/chosen": 0.23952540755271912, "rewards/margins": 1.0005695819854736, "rewards/rejected": -0.7610442042350769, "step": 3609 }, { "epoch": 0.42, "learning_rate": 1.778298021772211e-07, "logits/chosen": -3.8750076293945312, "logits/rejected": -4.246096134185791, "logps/chosen": -143.98519897460938, "logps/rejected": -244.53274536132812, "loss": 0.3754, "rewards/accuracies": 0.75, "rewards/chosen": -0.4130411446094513, "rewards/margins": 1.1894339323043823, "rewards/rejected": -1.6024751663208008, "step": 3610 }, { "epoch": 0.42, "learning_rate": 1.7779468570759685e-07, "logits/chosen": -2.4477384090423584, "logits/rejected": -2.4438869953155518, "logps/chosen": -179.17491149902344, "logps/rejected": -186.32838439941406, "loss": 0.4397, "rewards/accuracies": 0.875, "rewards/chosen": 0.05304726958274841, "rewards/margins": 1.2747142314910889, "rewards/rejected": -1.2216670513153076, "step": 3611 }, { "epoch": 0.42, "learning_rate": 1.777595692379726e-07, "logits/chosen": -3.761974811553955, "logits/rejected": -3.787919521331787, "logps/chosen": -272.86517333984375, "logps/rejected": -204.9129180908203, "loss": 0.2827, "rewards/accuracies": 0.875, "rewards/chosen": 0.03332391381263733, "rewards/margins": 2.135557174682617, "rewards/rejected": -2.1022331714630127, "step": 3612 }, { "epoch": 0.42, "learning_rate": 1.7772445276834833e-07, "logits/chosen": -3.0092782974243164, "logits/rejected": -2.890505313873291, "logps/chosen": -261.05487060546875, "logps/rejected": -240.9141845703125, "loss": 0.3473, "rewards/accuracies": 0.875, "rewards/chosen": -0.17746275663375854, "rewards/margins": 1.3008074760437012, "rewards/rejected": -1.4782702922821045, "step": 3613 }, { "epoch": 0.42, "learning_rate": 1.7768933629872411e-07, "logits/chosen": -2.469113826751709, "logits/rejected": -2.7553484439849854, "logps/chosen": -164.67059326171875, "logps/rejected": -298.8160400390625, "loss": 0.2287, "rewards/accuracies": 0.875, "rewards/chosen": 0.06380872428417206, "rewards/margins": 2.438086986541748, "rewards/rejected": -2.3742780685424805, "step": 3614 }, { "epoch": 0.42, "learning_rate": 1.7765421982909981e-07, "logits/chosen": -3.7470250129699707, "logits/rejected": -3.323604106903076, "logps/chosen": -335.26123046875, "logps/rejected": -183.23675537109375, "loss": 0.1695, "rewards/accuracies": 1.0, "rewards/chosen": 0.7733514904975891, "rewards/margins": 2.5403060913085938, "rewards/rejected": -1.7669545412063599, "step": 3615 }, { "epoch": 0.42, "learning_rate": 1.776191033594756e-07, "logits/chosen": -3.0998876094818115, "logits/rejected": -3.125283718109131, "logps/chosen": -219.47897338867188, "logps/rejected": -226.8091583251953, "loss": 0.165, "rewards/accuracies": 1.0, "rewards/chosen": 0.32330378890037537, "rewards/margins": 2.430572509765625, "rewards/rejected": -2.107268810272217, "step": 3616 }, { "epoch": 0.42, "learning_rate": 1.7758398688985135e-07, "logits/chosen": -3.2517032623291016, "logits/rejected": -2.769226551055908, "logps/chosen": -408.4369201660156, "logps/rejected": -312.8028259277344, "loss": 0.3757, "rewards/accuracies": 0.75, "rewards/chosen": -0.3774459660053253, "rewards/margins": 1.129897117614746, "rewards/rejected": -1.507343053817749, "step": 3617 }, { "epoch": 0.42, "learning_rate": 1.7754887042022708e-07, "logits/chosen": -2.463773488998413, "logits/rejected": -2.4494080543518066, "logps/chosen": -453.77117919921875, "logps/rejected": -403.610107421875, "loss": 0.3074, "rewards/accuracies": 0.875, "rewards/chosen": 0.16539503633975983, "rewards/margins": 2.214940309524536, "rewards/rejected": -2.0495450496673584, "step": 3618 }, { "epoch": 0.42, "learning_rate": 1.7751375395060283e-07, "logits/chosen": -3.2914929389953613, "logits/rejected": -3.1912124156951904, "logps/chosen": -322.12652587890625, "logps/rejected": -205.80499267578125, "loss": 0.4387, "rewards/accuracies": 0.75, "rewards/chosen": 0.19578562676906586, "rewards/margins": 1.6542820930480957, "rewards/rejected": -1.4584966897964478, "step": 3619 }, { "epoch": 0.42, "learning_rate": 1.7747863748097856e-07, "logits/chosen": -3.265730619430542, "logits/rejected": -2.859041929244995, "logps/chosen": -283.0668640136719, "logps/rejected": -135.1713409423828, "loss": 0.5815, "rewards/accuracies": 0.5, "rewards/chosen": -0.22154748439788818, "rewards/margins": 1.3742036819458008, "rewards/rejected": -1.5957510471343994, "step": 3620 }, { "epoch": 0.42, "learning_rate": 1.774435210113543e-07, "logits/chosen": -3.135930299758911, "logits/rejected": -3.2024154663085938, "logps/chosen": -235.9925994873047, "logps/rejected": -241.52487182617188, "loss": 0.3178, "rewards/accuracies": 1.0, "rewards/chosen": 0.02126297727227211, "rewards/margins": 1.5163897275924683, "rewards/rejected": -1.495126724243164, "step": 3621 }, { "epoch": 0.42, "learning_rate": 1.7740840454173007e-07, "logits/chosen": -2.8377175331115723, "logits/rejected": -2.809696674346924, "logps/chosen": -174.07395935058594, "logps/rejected": -127.28224182128906, "loss": 0.6494, "rewards/accuracies": 0.75, "rewards/chosen": -0.0016875974833965302, "rewards/margins": 0.5058482885360718, "rewards/rejected": -0.5075358152389526, "step": 3622 }, { "epoch": 0.42, "learning_rate": 1.773732880721058e-07, "logits/chosen": -3.0956342220306396, "logits/rejected": -2.9850006103515625, "logps/chosen": -287.33197021484375, "logps/rejected": -269.5440673828125, "loss": 0.3462, "rewards/accuracies": 0.75, "rewards/chosen": 0.22540880739688873, "rewards/margins": 1.6704620122909546, "rewards/rejected": -1.4450531005859375, "step": 3623 }, { "epoch": 0.42, "learning_rate": 1.7733817160248155e-07, "logits/chosen": -2.9730210304260254, "logits/rejected": -2.8924403190612793, "logps/chosen": -185.6951904296875, "logps/rejected": -281.78009033203125, "loss": 0.669, "rewards/accuracies": 0.375, "rewards/chosen": -0.1997338831424713, "rewards/margins": 0.3705267012119293, "rewards/rejected": -0.5702605843544006, "step": 3624 }, { "epoch": 0.42, "learning_rate": 1.7730305513285733e-07, "logits/chosen": -3.105058431625366, "logits/rejected": -2.7510719299316406, "logps/chosen": -169.9664764404297, "logps/rejected": -163.7920379638672, "loss": 0.6777, "rewards/accuracies": 0.75, "rewards/chosen": -0.5565968751907349, "rewards/margins": 0.8129980564117432, "rewards/rejected": -1.369594931602478, "step": 3625 }, { "epoch": 0.42, "learning_rate": 1.7726793866323305e-07, "logits/chosen": -3.0943500995635986, "logits/rejected": -3.062509536743164, "logps/chosen": -350.2023010253906, "logps/rejected": -318.0776062011719, "loss": 0.4032, "rewards/accuracies": 0.75, "rewards/chosen": 0.3525112271308899, "rewards/margins": 1.302563190460205, "rewards/rejected": -0.95005202293396, "step": 3626 }, { "epoch": 0.42, "learning_rate": 1.772328221936088e-07, "logits/chosen": -3.3769869804382324, "logits/rejected": -3.070962905883789, "logps/chosen": -308.0557861328125, "logps/rejected": -298.95318603515625, "loss": 0.2062, "rewards/accuracies": 1.0, "rewards/chosen": -0.2304621934890747, "rewards/margins": 2.0559091567993164, "rewards/rejected": -2.2863712310791016, "step": 3627 }, { "epoch": 0.42, "learning_rate": 1.7719770572398454e-07, "logits/chosen": -3.4546990394592285, "logits/rejected": -3.5186803340911865, "logps/chosen": -246.6385040283203, "logps/rejected": -301.3353271484375, "loss": 0.2091, "rewards/accuracies": 0.875, "rewards/chosen": 0.43586647510528564, "rewards/margins": 2.5885744094848633, "rewards/rejected": -2.152707815170288, "step": 3628 }, { "epoch": 0.42, "learning_rate": 1.771625892543603e-07, "logits/chosen": -3.2144250869750977, "logits/rejected": -3.158358573913574, "logps/chosen": -132.92147827148438, "logps/rejected": -206.49127197265625, "loss": 0.4905, "rewards/accuracies": 0.75, "rewards/chosen": 0.06636463105678558, "rewards/margins": 0.9211452007293701, "rewards/rejected": -0.8547805547714233, "step": 3629 }, { "epoch": 0.42, "learning_rate": 1.7712747278473604e-07, "logits/chosen": -3.1416573524475098, "logits/rejected": -3.600766897201538, "logps/chosen": -210.75302124023438, "logps/rejected": -163.16444396972656, "loss": 0.3516, "rewards/accuracies": 0.875, "rewards/chosen": 0.08937221765518188, "rewards/margins": 1.8864513635635376, "rewards/rejected": -1.797079086303711, "step": 3630 }, { "epoch": 0.42, "learning_rate": 1.7709235631511177e-07, "logits/chosen": -2.612319231033325, "logits/rejected": -2.4941697120666504, "logps/chosen": -374.9217529296875, "logps/rejected": -263.1956787109375, "loss": 0.5677, "rewards/accuracies": 0.75, "rewards/chosen": -0.15920697152614594, "rewards/margins": 0.7866968512535095, "rewards/rejected": -0.9459038376808167, "step": 3631 }, { "epoch": 0.42, "learning_rate": 1.7705723984548752e-07, "logits/chosen": -3.0926594734191895, "logits/rejected": -3.273958206176758, "logps/chosen": -258.765380859375, "logps/rejected": -291.8808898925781, "loss": 0.4476, "rewards/accuracies": 0.75, "rewards/chosen": 0.34579548239707947, "rewards/margins": 1.5180823802947998, "rewards/rejected": -1.172287106513977, "step": 3632 }, { "epoch": 0.42, "learning_rate": 1.7702212337586328e-07, "logits/chosen": -2.363273859024048, "logits/rejected": -2.3719887733459473, "logps/chosen": -400.58843994140625, "logps/rejected": -380.9510498046875, "loss": 0.2131, "rewards/accuracies": 0.875, "rewards/chosen": -0.010603904724121094, "rewards/margins": 1.9839978218078613, "rewards/rejected": -1.9946014881134033, "step": 3633 }, { "epoch": 0.42, "learning_rate": 1.76987006906239e-07, "logits/chosen": -3.0332791805267334, "logits/rejected": -3.0523829460144043, "logps/chosen": -170.6876983642578, "logps/rejected": -393.2889404296875, "loss": 0.0979, "rewards/accuracies": 1.0, "rewards/chosen": 0.490287184715271, "rewards/margins": 3.7241032123565674, "rewards/rejected": -3.233815908432007, "step": 3634 }, { "epoch": 0.42, "learning_rate": 1.7695189043661476e-07, "logits/chosen": -2.860816478729248, "logits/rejected": -2.9145326614379883, "logps/chosen": -201.06004333496094, "logps/rejected": -176.80810546875, "loss": 0.2935, "rewards/accuracies": 1.0, "rewards/chosen": -0.23176246881484985, "rewards/margins": 1.7282990217208862, "rewards/rejected": -1.9600615501403809, "step": 3635 }, { "epoch": 0.42, "learning_rate": 1.769167739669905e-07, "logits/chosen": -2.9824328422546387, "logits/rejected": -2.9698352813720703, "logps/chosen": -298.8978576660156, "logps/rejected": -255.1248016357422, "loss": 0.457, "rewards/accuracies": 0.875, "rewards/chosen": 0.47917526960372925, "rewards/margins": 0.95234215259552, "rewards/rejected": -0.473166823387146, "step": 3636 }, { "epoch": 0.42, "learning_rate": 1.7688165749736627e-07, "logits/chosen": -2.799117088317871, "logits/rejected": -2.8473410606384277, "logps/chosen": -339.62542724609375, "logps/rejected": -250.97140502929688, "loss": 0.3787, "rewards/accuracies": 0.75, "rewards/chosen": 0.03334202617406845, "rewards/margins": 1.5134683847427368, "rewards/rejected": -1.4801263809204102, "step": 3637 }, { "epoch": 0.42, "learning_rate": 1.7684654102774202e-07, "logits/chosen": -2.3478775024414062, "logits/rejected": -2.2873880863189697, "logps/chosen": -362.37628173828125, "logps/rejected": -432.9797058105469, "loss": 0.3857, "rewards/accuracies": 0.75, "rewards/chosen": 0.20124129951000214, "rewards/margins": 1.730050802230835, "rewards/rejected": -1.5288094282150269, "step": 3638 }, { "epoch": 0.42, "learning_rate": 1.7681142455811775e-07, "logits/chosen": -3.220346689224243, "logits/rejected": -2.5427706241607666, "logps/chosen": -358.1689758300781, "logps/rejected": -267.6650390625, "loss": 0.3799, "rewards/accuracies": 0.625, "rewards/chosen": -0.16996780037879944, "rewards/margins": 1.4173362255096436, "rewards/rejected": -1.5873042345046997, "step": 3639 }, { "epoch": 0.42, "learning_rate": 1.767763080884935e-07, "logits/chosen": -2.6814520359039307, "logits/rejected": -2.8254384994506836, "logps/chosen": -302.58636474609375, "logps/rejected": -260.7044677734375, "loss": 0.4382, "rewards/accuracies": 0.75, "rewards/chosen": 0.1532062143087387, "rewards/margins": 1.005242943763733, "rewards/rejected": -0.852036714553833, "step": 3640 }, { "epoch": 0.42, "learning_rate": 1.7674119161886926e-07, "logits/chosen": -2.943485736846924, "logits/rejected": -3.073014259338379, "logps/chosen": -281.6152648925781, "logps/rejected": -239.97113037109375, "loss": 0.6927, "rewards/accuracies": 0.625, "rewards/chosen": 0.011068761348724365, "rewards/margins": 1.0024497509002686, "rewards/rejected": -0.9913809299468994, "step": 3641 }, { "epoch": 0.42, "learning_rate": 1.7670607514924498e-07, "logits/chosen": -2.772709846496582, "logits/rejected": -2.927123546600342, "logps/chosen": -245.40646362304688, "logps/rejected": -274.51153564453125, "loss": 0.7098, "rewards/accuracies": 0.625, "rewards/chosen": -0.76863694190979, "rewards/margins": 0.78693687915802, "rewards/rejected": -1.55557382106781, "step": 3642 }, { "epoch": 0.42, "learning_rate": 1.7667095867962074e-07, "logits/chosen": -3.081103563308716, "logits/rejected": -2.770125389099121, "logps/chosen": -268.05950927734375, "logps/rejected": -306.35455322265625, "loss": 0.2429, "rewards/accuracies": 0.875, "rewards/chosen": 0.5047484040260315, "rewards/margins": 1.88785719871521, "rewards/rejected": -1.3831087350845337, "step": 3643 }, { "epoch": 0.42, "learning_rate": 1.7663584220999646e-07, "logits/chosen": -3.140497922897339, "logits/rejected": -2.9025254249572754, "logps/chosen": -274.5437927246094, "logps/rejected": -251.7563018798828, "loss": 0.4712, "rewards/accuracies": 0.875, "rewards/chosen": -0.753960132598877, "rewards/margins": 1.5107910633087158, "rewards/rejected": -2.264751434326172, "step": 3644 }, { "epoch": 0.42, "learning_rate": 1.7660072574037222e-07, "logits/chosen": -2.6147029399871826, "logits/rejected": -3.006087303161621, "logps/chosen": -215.32363891601562, "logps/rejected": -314.00701904296875, "loss": 0.3476, "rewards/accuracies": 0.875, "rewards/chosen": -0.4955264925956726, "rewards/margins": 1.5818054676055908, "rewards/rejected": -2.077331781387329, "step": 3645 }, { "epoch": 0.42, "learning_rate": 1.7656560927074797e-07, "logits/chosen": -2.649563789367676, "logits/rejected": -2.567958354949951, "logps/chosen": -188.53775024414062, "logps/rejected": -249.00657653808594, "loss": 0.546, "rewards/accuracies": 0.625, "rewards/chosen": -0.0882013812661171, "rewards/margins": 1.824887752532959, "rewards/rejected": -1.9130892753601074, "step": 3646 }, { "epoch": 0.42, "learning_rate": 1.765304928011237e-07, "logits/chosen": -2.613126754760742, "logits/rejected": -2.917285203933716, "logps/chosen": -287.8216247558594, "logps/rejected": -122.5135726928711, "loss": 0.3272, "rewards/accuracies": 1.0, "rewards/chosen": 0.12070082873106003, "rewards/margins": 1.2106047868728638, "rewards/rejected": -1.0899039506912231, "step": 3647 }, { "epoch": 0.42, "learning_rate": 1.7649537633149948e-07, "logits/chosen": -3.3094234466552734, "logits/rejected": -3.559028148651123, "logps/chosen": -151.65423583984375, "logps/rejected": -204.50338745117188, "loss": 0.4381, "rewards/accuracies": 0.75, "rewards/chosen": -0.2810271978378296, "rewards/margins": 2.122706890106201, "rewards/rejected": -2.403733968734741, "step": 3648 }, { "epoch": 0.42, "learning_rate": 1.7646025986187523e-07, "logits/chosen": -3.695383071899414, "logits/rejected": -3.711465835571289, "logps/chosen": -185.64743041992188, "logps/rejected": -198.56048583984375, "loss": 0.3539, "rewards/accuracies": 0.875, "rewards/chosen": -0.03897445276379585, "rewards/margins": 1.7048319578170776, "rewards/rejected": -1.7438063621520996, "step": 3649 }, { "epoch": 0.42, "learning_rate": 1.7642514339225096e-07, "logits/chosen": -2.4696390628814697, "logits/rejected": -2.4563019275665283, "logps/chosen": -312.1109924316406, "logps/rejected": -380.2071533203125, "loss": 0.2734, "rewards/accuracies": 0.875, "rewards/chosen": 0.014812782406806946, "rewards/margins": 2.9819231033325195, "rewards/rejected": -2.9671101570129395, "step": 3650 }, { "epoch": 0.42, "learning_rate": 1.7639002692262672e-07, "logits/chosen": -3.054286479949951, "logits/rejected": -3.4297707080841064, "logps/chosen": -171.6527557373047, "logps/rejected": -212.00408935546875, "loss": 0.2795, "rewards/accuracies": 0.875, "rewards/chosen": 0.6143932938575745, "rewards/margins": 2.265759229660034, "rewards/rejected": -1.6513659954071045, "step": 3651 }, { "epoch": 0.42, "learning_rate": 1.7635491045300244e-07, "logits/chosen": -3.5356597900390625, "logits/rejected": -3.3827714920043945, "logps/chosen": -237.62335205078125, "logps/rejected": -244.1085205078125, "loss": 0.2377, "rewards/accuracies": 1.0, "rewards/chosen": 0.11496227234601974, "rewards/margins": 2.1504745483398438, "rewards/rejected": -2.0355124473571777, "step": 3652 }, { "epoch": 0.42, "learning_rate": 1.763197939833782e-07, "logits/chosen": -3.4730801582336426, "logits/rejected": -3.5231266021728516, "logps/chosen": -379.6275634765625, "logps/rejected": -360.52691650390625, "loss": 0.2104, "rewards/accuracies": 0.875, "rewards/chosen": -0.2855432331562042, "rewards/margins": 2.7163121700286865, "rewards/rejected": -3.0018553733825684, "step": 3653 }, { "epoch": 0.42, "learning_rate": 1.7628467751375395e-07, "logits/chosen": -2.6805691719055176, "logits/rejected": -2.606480121612549, "logps/chosen": -377.70361328125, "logps/rejected": -220.27452087402344, "loss": 0.4317, "rewards/accuracies": 0.875, "rewards/chosen": 0.15545958280563354, "rewards/margins": 1.2018458843231201, "rewards/rejected": -1.0463862419128418, "step": 3654 }, { "epoch": 0.42, "learning_rate": 1.7624956104412968e-07, "logits/chosen": -2.796295166015625, "logits/rejected": -2.802703619003296, "logps/chosen": -323.9333801269531, "logps/rejected": -396.0824279785156, "loss": 0.5177, "rewards/accuracies": 0.75, "rewards/chosen": 0.30582720041275024, "rewards/margins": 1.525369644165039, "rewards/rejected": -1.219542384147644, "step": 3655 }, { "epoch": 0.42, "learning_rate": 1.7621444457450543e-07, "logits/chosen": -3.074664354324341, "logits/rejected": -2.9063656330108643, "logps/chosen": -134.7894287109375, "logps/rejected": -207.12083435058594, "loss": 0.3646, "rewards/accuracies": 0.75, "rewards/chosen": -0.1659690886735916, "rewards/margins": 3.3255927562713623, "rewards/rejected": -3.4915618896484375, "step": 3656 }, { "epoch": 0.42, "learning_rate": 1.7617932810488119e-07, "logits/chosen": -2.788881540298462, "logits/rejected": -2.715400218963623, "logps/chosen": -319.77001953125, "logps/rejected": -294.37225341796875, "loss": 0.1504, "rewards/accuracies": 1.0, "rewards/chosen": 0.2999189496040344, "rewards/margins": 3.4237289428710938, "rewards/rejected": -3.123810052871704, "step": 3657 }, { "epoch": 0.42, "learning_rate": 1.761442116352569e-07, "logits/chosen": -3.4173128604888916, "logits/rejected": -3.2563412189483643, "logps/chosen": -211.1702880859375, "logps/rejected": -242.68035888671875, "loss": 0.7213, "rewards/accuracies": 0.75, "rewards/chosen": -0.023118890821933746, "rewards/margins": 1.0918171405792236, "rewards/rejected": -1.114936113357544, "step": 3658 }, { "epoch": 0.42, "learning_rate": 1.761090951656327e-07, "logits/chosen": -3.4876203536987305, "logits/rejected": -3.292799949645996, "logps/chosen": -254.85113525390625, "logps/rejected": -185.74607849121094, "loss": 0.3861, "rewards/accuracies": 0.75, "rewards/chosen": -0.2451949566602707, "rewards/margins": 2.270878791809082, "rewards/rejected": -2.516073703765869, "step": 3659 }, { "epoch": 0.42, "learning_rate": 1.760739786960084e-07, "logits/chosen": -3.8485403060913086, "logits/rejected": -3.8267271518707275, "logps/chosen": -155.20123291015625, "logps/rejected": -210.4557342529297, "loss": 0.2287, "rewards/accuracies": 0.875, "rewards/chosen": 0.6365233659744263, "rewards/margins": 2.4134607315063477, "rewards/rejected": -1.776937484741211, "step": 3660 }, { "epoch": 0.42, "learning_rate": 1.7603886222638417e-07, "logits/chosen": -3.7104101181030273, "logits/rejected": -3.495621681213379, "logps/chosen": -278.2177734375, "logps/rejected": -146.71397399902344, "loss": 0.4815, "rewards/accuracies": 0.875, "rewards/chosen": 0.005841255187988281, "rewards/margins": 1.2475409507751465, "rewards/rejected": -1.2416998147964478, "step": 3661 }, { "epoch": 0.42, "learning_rate": 1.7600374575675993e-07, "logits/chosen": -2.770716667175293, "logits/rejected": -2.7188820838928223, "logps/chosen": -270.78045654296875, "logps/rejected": -239.47996520996094, "loss": 0.241, "rewards/accuracies": 0.875, "rewards/chosen": 0.2629327178001404, "rewards/margins": 2.3488526344299316, "rewards/rejected": -2.0859200954437256, "step": 3662 }, { "epoch": 0.42, "learning_rate": 1.7596862928713566e-07, "logits/chosen": -2.9577372074127197, "logits/rejected": -2.7594523429870605, "logps/chosen": -410.2992858886719, "logps/rejected": -333.0937194824219, "loss": 0.7051, "rewards/accuracies": 0.5, "rewards/chosen": -1.0373762845993042, "rewards/margins": 0.46418291330337524, "rewards/rejected": -1.5015592575073242, "step": 3663 }, { "epoch": 0.42, "learning_rate": 1.759335128175114e-07, "logits/chosen": -3.327953577041626, "logits/rejected": -3.3014254570007324, "logps/chosen": -179.184814453125, "logps/rejected": -190.2238006591797, "loss": 0.7397, "rewards/accuracies": 0.625, "rewards/chosen": -0.30971142649650574, "rewards/margins": 0.3783586323261261, "rewards/rejected": -0.6880700588226318, "step": 3664 }, { "epoch": 0.42, "learning_rate": 1.7589839634788714e-07, "logits/chosen": -2.819211006164551, "logits/rejected": -2.8049912452697754, "logps/chosen": -401.12420654296875, "logps/rejected": -256.31427001953125, "loss": 0.2985, "rewards/accuracies": 0.875, "rewards/chosen": 0.17595121264457703, "rewards/margins": 1.51735520362854, "rewards/rejected": -1.3414039611816406, "step": 3665 }, { "epoch": 0.42, "learning_rate": 1.758632798782629e-07, "logits/chosen": -2.505849838256836, "logits/rejected": -2.376697301864624, "logps/chosen": -256.70904541015625, "logps/rejected": -232.02247619628906, "loss": 0.6854, "rewards/accuracies": 0.625, "rewards/chosen": -0.27026069164276123, "rewards/margins": 1.2330822944641113, "rewards/rejected": -1.5033429861068726, "step": 3666 }, { "epoch": 0.42, "learning_rate": 1.7582816340863864e-07, "logits/chosen": -2.8012218475341797, "logits/rejected": -2.8539414405822754, "logps/chosen": -197.89016723632812, "logps/rejected": -280.71466064453125, "loss": 0.1112, "rewards/accuracies": 1.0, "rewards/chosen": 0.5192413330078125, "rewards/margins": 2.657531499862671, "rewards/rejected": -2.1382904052734375, "step": 3667 }, { "epoch": 0.42, "learning_rate": 1.7579304693901437e-07, "logits/chosen": -2.881971836090088, "logits/rejected": -2.901447296142578, "logps/chosen": -223.26048278808594, "logps/rejected": -323.59002685546875, "loss": 0.2808, "rewards/accuracies": 0.75, "rewards/chosen": 0.4198216497898102, "rewards/margins": 2.851172685623169, "rewards/rejected": -2.4313511848449707, "step": 3668 }, { "epoch": 0.42, "learning_rate": 1.7575793046939013e-07, "logits/chosen": -3.636566400527954, "logits/rejected": -3.425074577331543, "logps/chosen": -346.55755615234375, "logps/rejected": -306.062744140625, "loss": 0.338, "rewards/accuracies": 1.0, "rewards/chosen": 0.015161365270614624, "rewards/margins": 1.249566912651062, "rewards/rejected": -1.234405517578125, "step": 3669 }, { "epoch": 0.42, "learning_rate": 1.757228139997659e-07, "logits/chosen": -3.645534038543701, "logits/rejected": -3.7407495975494385, "logps/chosen": -292.91790771484375, "logps/rejected": -273.178955078125, "loss": 0.2386, "rewards/accuracies": 0.875, "rewards/chosen": 0.07463999092578888, "rewards/margins": 2.6679911613464355, "rewards/rejected": -2.593351125717163, "step": 3670 }, { "epoch": 0.42, "learning_rate": 1.7568769753014163e-07, "logits/chosen": -3.3033218383789062, "logits/rejected": -3.3104379177093506, "logps/chosen": -204.4661865234375, "logps/rejected": -259.4085998535156, "loss": 0.4702, "rewards/accuracies": 0.625, "rewards/chosen": -0.02536982297897339, "rewards/margins": 1.9571795463562012, "rewards/rejected": -1.9825491905212402, "step": 3671 }, { "epoch": 0.42, "learning_rate": 1.756525810605174e-07, "logits/chosen": -3.599915027618408, "logits/rejected": -3.6784982681274414, "logps/chosen": -181.92926025390625, "logps/rejected": -224.1087188720703, "loss": 0.3597, "rewards/accuracies": 0.625, "rewards/chosen": 0.4313427805900574, "rewards/margins": 1.650991439819336, "rewards/rejected": -1.2196487188339233, "step": 3672 }, { "epoch": 0.42, "learning_rate": 1.7561746459089311e-07, "logits/chosen": -2.8521246910095215, "logits/rejected": -3.0531527996063232, "logps/chosen": -332.9010009765625, "logps/rejected": -254.57510375976562, "loss": 0.1776, "rewards/accuracies": 0.875, "rewards/chosen": 0.5503280758857727, "rewards/margins": 3.1421520709991455, "rewards/rejected": -2.5918242931365967, "step": 3673 }, { "epoch": 0.42, "learning_rate": 1.7558234812126887e-07, "logits/chosen": -3.2007575035095215, "logits/rejected": -2.4732701778411865, "logps/chosen": -240.74737548828125, "logps/rejected": -184.19097900390625, "loss": 0.39, "rewards/accuracies": 0.75, "rewards/chosen": 0.4747461676597595, "rewards/margins": 1.3680391311645508, "rewards/rejected": -0.8932929039001465, "step": 3674 }, { "epoch": 0.42, "learning_rate": 1.7554723165164462e-07, "logits/chosen": -2.2716755867004395, "logits/rejected": -2.4005074501037598, "logps/chosen": -103.11361694335938, "logps/rejected": -256.044677734375, "loss": 0.4659, "rewards/accuracies": 0.625, "rewards/chosen": 0.2913406789302826, "rewards/margins": 1.5435810089111328, "rewards/rejected": -1.2522403001785278, "step": 3675 }, { "epoch": 0.42, "learning_rate": 1.7551211518202035e-07, "logits/chosen": -3.160127878189087, "logits/rejected": -3.250457525253296, "logps/chosen": -206.453125, "logps/rejected": -212.25253295898438, "loss": 0.2886, "rewards/accuracies": 1.0, "rewards/chosen": 0.5067204833030701, "rewards/margins": 1.533459186553955, "rewards/rejected": -1.0267386436462402, "step": 3676 }, { "epoch": 0.42, "learning_rate": 1.754769987123961e-07, "logits/chosen": -2.930300235748291, "logits/rejected": -2.809786558151245, "logps/chosen": -340.4488830566406, "logps/rejected": -279.65545654296875, "loss": 0.4697, "rewards/accuracies": 0.75, "rewards/chosen": -0.20617930591106415, "rewards/margins": 2.6692047119140625, "rewards/rejected": -2.8753838539123535, "step": 3677 }, { "epoch": 0.42, "learning_rate": 1.7544188224277186e-07, "logits/chosen": -3.345031261444092, "logits/rejected": -3.3347318172454834, "logps/chosen": -224.3382568359375, "logps/rejected": -147.240234375, "loss": 0.4561, "rewards/accuracies": 0.75, "rewards/chosen": -0.18426811695098877, "rewards/margins": 1.3964011669158936, "rewards/rejected": -1.5806691646575928, "step": 3678 }, { "epoch": 0.42, "learning_rate": 1.7540676577314758e-07, "logits/chosen": -3.3903074264526367, "logits/rejected": -3.34645414352417, "logps/chosen": -248.82781982421875, "logps/rejected": -150.66310119628906, "loss": 0.4146, "rewards/accuracies": 0.75, "rewards/chosen": 0.3776225745677948, "rewards/margins": 1.2259905338287354, "rewards/rejected": -0.8483679890632629, "step": 3679 }, { "epoch": 0.42, "learning_rate": 1.7537164930352334e-07, "logits/chosen": -3.1243927478790283, "logits/rejected": -3.3144679069519043, "logps/chosen": -272.9576416015625, "logps/rejected": -287.1666564941406, "loss": 0.408, "rewards/accuracies": 0.75, "rewards/chosen": -0.03647109866142273, "rewards/margins": 2.2195956707000732, "rewards/rejected": -2.2560667991638184, "step": 3680 }, { "epoch": 0.42, "learning_rate": 1.7533653283389907e-07, "logits/chosen": -3.866467237472534, "logits/rejected": -3.907078981399536, "logps/chosen": -265.8874206542969, "logps/rejected": -274.9924011230469, "loss": 0.4631, "rewards/accuracies": 0.625, "rewards/chosen": -0.1495412290096283, "rewards/margins": 1.4447901248931885, "rewards/rejected": -1.5943312644958496, "step": 3681 }, { "epoch": 0.42, "learning_rate": 1.7530141636427485e-07, "logits/chosen": -3.3129658699035645, "logits/rejected": -3.078537702560425, "logps/chosen": -425.0328063964844, "logps/rejected": -313.2099609375, "loss": 0.4543, "rewards/accuracies": 0.875, "rewards/chosen": 0.009612083435058594, "rewards/margins": 1.6689425706863403, "rewards/rejected": -1.6593303680419922, "step": 3682 }, { "epoch": 0.42, "learning_rate": 1.752662998946506e-07, "logits/chosen": -2.9076428413391113, "logits/rejected": -3.0406863689422607, "logps/chosen": -247.17361450195312, "logps/rejected": -192.90579223632812, "loss": 0.6173, "rewards/accuracies": 0.625, "rewards/chosen": -0.3767296373844147, "rewards/margins": 0.4925684928894043, "rewards/rejected": -0.8692981600761414, "step": 3683 }, { "epoch": 0.42, "learning_rate": 1.7523118342502633e-07, "logits/chosen": -3.044571876525879, "logits/rejected": -2.984180212020874, "logps/chosen": -166.7109832763672, "logps/rejected": -146.12913513183594, "loss": 0.5838, "rewards/accuracies": 0.625, "rewards/chosen": -0.16488619148731232, "rewards/margins": 0.4403136372566223, "rewards/rejected": -0.6051998734474182, "step": 3684 }, { "epoch": 0.42, "learning_rate": 1.7519606695540208e-07, "logits/chosen": -2.985586166381836, "logits/rejected": -3.2165706157684326, "logps/chosen": -448.128173828125, "logps/rejected": -288.26593017578125, "loss": 0.3185, "rewards/accuracies": 0.875, "rewards/chosen": 0.32975998520851135, "rewards/margins": 2.4367318153381348, "rewards/rejected": -2.1069719791412354, "step": 3685 }, { "epoch": 0.42, "learning_rate": 1.7516095048577784e-07, "logits/chosen": -2.56539249420166, "logits/rejected": -2.5182745456695557, "logps/chosen": -253.15289306640625, "logps/rejected": -249.2406005859375, "loss": 0.4847, "rewards/accuracies": 0.75, "rewards/chosen": 0.21470847725868225, "rewards/margins": 1.381178855895996, "rewards/rejected": -1.1664702892303467, "step": 3686 }, { "epoch": 0.43, "learning_rate": 1.7512583401615356e-07, "logits/chosen": -2.7904419898986816, "logits/rejected": -3.1729698181152344, "logps/chosen": -422.5179443359375, "logps/rejected": -338.39886474609375, "loss": 0.6094, "rewards/accuracies": 0.75, "rewards/chosen": -0.5157440900802612, "rewards/margins": 0.5581536293029785, "rewards/rejected": -1.0738977193832397, "step": 3687 }, { "epoch": 0.43, "learning_rate": 1.7509071754652932e-07, "logits/chosen": -2.3732004165649414, "logits/rejected": -2.344940185546875, "logps/chosen": -270.4952697753906, "logps/rejected": -192.2425079345703, "loss": 0.4994, "rewards/accuracies": 0.75, "rewards/chosen": -0.04739580303430557, "rewards/margins": 1.4107962846755981, "rewards/rejected": -1.458191990852356, "step": 3688 }, { "epoch": 0.43, "learning_rate": 1.7505560107690504e-07, "logits/chosen": -3.320840358734131, "logits/rejected": -3.3140125274658203, "logps/chosen": -362.8102722167969, "logps/rejected": -204.6231689453125, "loss": 0.1873, "rewards/accuracies": 1.0, "rewards/chosen": 0.2925833761692047, "rewards/margins": 2.486625909805298, "rewards/rejected": -2.194042682647705, "step": 3689 }, { "epoch": 0.43, "learning_rate": 1.750204846072808e-07, "logits/chosen": -3.2383601665496826, "logits/rejected": -2.9918212890625, "logps/chosen": -397.9549255371094, "logps/rejected": -283.3879699707031, "loss": 0.3625, "rewards/accuracies": 0.875, "rewards/chosen": -0.0777997225522995, "rewards/margins": 1.394925832748413, "rewards/rejected": -1.4727253913879395, "step": 3690 }, { "epoch": 0.43, "learning_rate": 1.7498536813765655e-07, "logits/chosen": -3.4846014976501465, "logits/rejected": -3.4960670471191406, "logps/chosen": -164.9415283203125, "logps/rejected": -234.11642456054688, "loss": 0.4807, "rewards/accuracies": 0.625, "rewards/chosen": 0.02652006968855858, "rewards/margins": 0.8070441484451294, "rewards/rejected": -0.7805240154266357, "step": 3691 }, { "epoch": 0.43, "learning_rate": 1.7495025166803228e-07, "logits/chosen": -2.424030065536499, "logits/rejected": -2.503279447555542, "logps/chosen": -264.1920166015625, "logps/rejected": -212.9855499267578, "loss": 0.7075, "rewards/accuracies": 0.625, "rewards/chosen": -0.173688605427742, "rewards/margins": 0.9786339998245239, "rewards/rejected": -1.15232253074646, "step": 3692 }, { "epoch": 0.43, "learning_rate": 1.7491513519840806e-07, "logits/chosen": -3.946321964263916, "logits/rejected": -3.6730384826660156, "logps/chosen": -331.69842529296875, "logps/rejected": -238.94546508789062, "loss": 0.2948, "rewards/accuracies": 1.0, "rewards/chosen": -0.157293900847435, "rewards/margins": 1.9004795551300049, "rewards/rejected": -2.0577735900878906, "step": 3693 }, { "epoch": 0.43, "learning_rate": 1.7488001872878381e-07, "logits/chosen": -2.9397292137145996, "logits/rejected": -2.7671985626220703, "logps/chosen": -381.0958251953125, "logps/rejected": -293.4032897949219, "loss": 0.2535, "rewards/accuracies": 0.875, "rewards/chosen": 0.06413798779249191, "rewards/margins": 2.4678945541381836, "rewards/rejected": -2.4037563800811768, "step": 3694 }, { "epoch": 0.43, "learning_rate": 1.7484490225915954e-07, "logits/chosen": -3.505201816558838, "logits/rejected": -3.641650915145874, "logps/chosen": -241.50909423828125, "logps/rejected": -302.693603515625, "loss": 0.4064, "rewards/accuracies": 0.875, "rewards/chosen": 0.4585130214691162, "rewards/margins": 1.4443204402923584, "rewards/rejected": -0.9858075380325317, "step": 3695 }, { "epoch": 0.43, "learning_rate": 1.748097857895353e-07, "logits/chosen": -2.571855068206787, "logits/rejected": -2.744417428970337, "logps/chosen": -199.8043975830078, "logps/rejected": -272.9269714355469, "loss": 0.3304, "rewards/accuracies": 0.75, "rewards/chosen": 0.02228662371635437, "rewards/margins": 2.4847335815429688, "rewards/rejected": -2.462446928024292, "step": 3696 }, { "epoch": 0.43, "learning_rate": 1.7477466931991102e-07, "logits/chosen": -3.1367175579071045, "logits/rejected": -2.911776065826416, "logps/chosen": -268.60748291015625, "logps/rejected": -283.1267395019531, "loss": 0.2648, "rewards/accuracies": 1.0, "rewards/chosen": -0.03149862214922905, "rewards/margins": 2.236211061477661, "rewards/rejected": -2.267709732055664, "step": 3697 }, { "epoch": 0.43, "learning_rate": 1.7473955285028678e-07, "logits/chosen": -3.427600860595703, "logits/rejected": -3.278036594390869, "logps/chosen": -461.87469482421875, "logps/rejected": -329.0977478027344, "loss": 0.5293, "rewards/accuracies": 0.75, "rewards/chosen": -0.7517992258071899, "rewards/margins": 0.8983291387557983, "rewards/rejected": -1.6501283645629883, "step": 3698 }, { "epoch": 0.43, "learning_rate": 1.7470443638066253e-07, "logits/chosen": -2.5508201122283936, "logits/rejected": -2.710571050643921, "logps/chosen": -321.293701171875, "logps/rejected": -209.51194763183594, "loss": 0.2007, "rewards/accuracies": 0.875, "rewards/chosen": 0.024450138211250305, "rewards/margins": 2.032128095626831, "rewards/rejected": -2.0076780319213867, "step": 3699 }, { "epoch": 0.43, "learning_rate": 1.7466931991103826e-07, "logits/chosen": -2.9145636558532715, "logits/rejected": -2.9309887886047363, "logps/chosen": -155.93807983398438, "logps/rejected": -170.2395477294922, "loss": 0.778, "rewards/accuracies": 0.625, "rewards/chosen": -0.5807395577430725, "rewards/margins": 0.1392105221748352, "rewards/rejected": -0.7199500799179077, "step": 3700 }, { "epoch": 0.43, "learning_rate": 1.74634203441414e-07, "logits/chosen": -3.4217257499694824, "logits/rejected": -3.622093915939331, "logps/chosen": -242.79603576660156, "logps/rejected": -180.10264587402344, "loss": 0.7115, "rewards/accuracies": 0.75, "rewards/chosen": -0.5945636630058289, "rewards/margins": 1.9756590127944946, "rewards/rejected": -2.570222854614258, "step": 3701 }, { "epoch": 0.43, "learning_rate": 1.7459908697178976e-07, "logits/chosen": -2.9771952629089355, "logits/rejected": -2.8347296714782715, "logps/chosen": -163.63543701171875, "logps/rejected": -278.3885803222656, "loss": 0.342, "rewards/accuracies": 0.875, "rewards/chosen": -0.31905388832092285, "rewards/margins": 1.1981523036956787, "rewards/rejected": -1.5172061920166016, "step": 3702 }, { "epoch": 0.43, "learning_rate": 1.745639705021655e-07, "logits/chosen": -3.5726640224456787, "logits/rejected": -3.4684059619903564, "logps/chosen": -225.43280029296875, "logps/rejected": -300.43243408203125, "loss": 0.2404, "rewards/accuracies": 0.875, "rewards/chosen": -0.3851601779460907, "rewards/margins": 1.9210405349731445, "rewards/rejected": -2.3062007427215576, "step": 3703 }, { "epoch": 0.43, "learning_rate": 1.7452885403254127e-07, "logits/chosen": -2.531132221221924, "logits/rejected": -2.923954486846924, "logps/chosen": -167.4175262451172, "logps/rejected": -178.97923278808594, "loss": 0.4544, "rewards/accuracies": 0.75, "rewards/chosen": -0.7285107374191284, "rewards/margins": 1.6871981620788574, "rewards/rejected": -2.4157090187072754, "step": 3704 }, { "epoch": 0.43, "learning_rate": 1.74493737562917e-07, "logits/chosen": -3.2709741592407227, "logits/rejected": -3.0662009716033936, "logps/chosen": -217.2175750732422, "logps/rejected": -257.073486328125, "loss": 0.7644, "rewards/accuracies": 0.625, "rewards/chosen": -0.3162473142147064, "rewards/margins": 1.2224907875061035, "rewards/rejected": -1.5387380123138428, "step": 3705 }, { "epoch": 0.43, "learning_rate": 1.7445862109329275e-07, "logits/chosen": -2.809107780456543, "logits/rejected": -2.913557291030884, "logps/chosen": -216.73199462890625, "logps/rejected": -183.3575439453125, "loss": 0.4056, "rewards/accuracies": 0.875, "rewards/chosen": -0.06992653757333755, "rewards/margins": 1.582355260848999, "rewards/rejected": -1.6522817611694336, "step": 3706 }, { "epoch": 0.43, "learning_rate": 1.744235046236685e-07, "logits/chosen": -2.3153061866760254, "logits/rejected": -1.959198236465454, "logps/chosen": -483.89569091796875, "logps/rejected": -260.3747863769531, "loss": 0.5165, "rewards/accuracies": 0.5, "rewards/chosen": -0.7013405561447144, "rewards/margins": 1.2353366613388062, "rewards/rejected": -1.9366772174835205, "step": 3707 }, { "epoch": 0.43, "learning_rate": 1.7438838815404423e-07, "logits/chosen": -3.6715540885925293, "logits/rejected": -3.6815688610076904, "logps/chosen": -284.1375732421875, "logps/rejected": -285.5923767089844, "loss": 0.1526, "rewards/accuracies": 0.875, "rewards/chosen": 0.2228599190711975, "rewards/margins": 2.7908384799957275, "rewards/rejected": -2.567978858947754, "step": 3708 }, { "epoch": 0.43, "learning_rate": 1.7435327168442e-07, "logits/chosen": -3.2557315826416016, "logits/rejected": -3.175935983657837, "logps/chosen": -232.49562072753906, "logps/rejected": -274.1630859375, "loss": 0.1356, "rewards/accuracies": 1.0, "rewards/chosen": -0.15113241970539093, "rewards/margins": 2.4042086601257324, "rewards/rejected": -2.5553407669067383, "step": 3709 }, { "epoch": 0.43, "learning_rate": 1.7431815521479574e-07, "logits/chosen": -3.1184685230255127, "logits/rejected": -2.7624216079711914, "logps/chosen": -365.116455078125, "logps/rejected": -250.8306884765625, "loss": 0.4401, "rewards/accuracies": 0.875, "rewards/chosen": -0.11741462349891663, "rewards/margins": 1.4352836608886719, "rewards/rejected": -1.5526982545852661, "step": 3710 }, { "epoch": 0.43, "learning_rate": 1.7428303874517147e-07, "logits/chosen": -3.1036338806152344, "logits/rejected": -3.0900933742523193, "logps/chosen": -265.8157653808594, "logps/rejected": -313.23541259765625, "loss": 0.7364, "rewards/accuracies": 0.75, "rewards/chosen": -0.5526188611984253, "rewards/margins": 0.9299554824829102, "rewards/rejected": -1.482574224472046, "step": 3711 }, { "epoch": 0.43, "learning_rate": 1.7424792227554722e-07, "logits/chosen": -3.0043256282806396, "logits/rejected": -3.14552640914917, "logps/chosen": -272.4706115722656, "logps/rejected": -213.83880615234375, "loss": 0.2668, "rewards/accuracies": 0.875, "rewards/chosen": 0.0703638419508934, "rewards/margins": 2.5352463722229004, "rewards/rejected": -2.4648826122283936, "step": 3712 }, { "epoch": 0.43, "learning_rate": 1.7421280580592295e-07, "logits/chosen": -3.430011510848999, "logits/rejected": -3.286167621612549, "logps/chosen": -153.2526397705078, "logps/rejected": -192.38787841796875, "loss": 0.352, "rewards/accuracies": 0.75, "rewards/chosen": 0.0560891330242157, "rewards/margins": 2.607851266860962, "rewards/rejected": -2.551762342453003, "step": 3713 }, { "epoch": 0.43, "learning_rate": 1.741776893362987e-07, "logits/chosen": -2.9374959468841553, "logits/rejected": -2.735877513885498, "logps/chosen": -270.79620361328125, "logps/rejected": -233.01893615722656, "loss": 0.3584, "rewards/accuracies": 0.75, "rewards/chosen": 0.03279295191168785, "rewards/margins": 1.639037847518921, "rewards/rejected": -1.6062449216842651, "step": 3714 }, { "epoch": 0.43, "learning_rate": 1.7414257286667449e-07, "logits/chosen": -3.0995917320251465, "logits/rejected": -3.0616824626922607, "logps/chosen": -213.5299072265625, "logps/rejected": -208.32095336914062, "loss": 0.1337, "rewards/accuracies": 1.0, "rewards/chosen": 0.7068892121315002, "rewards/margins": 3.0374233722686768, "rewards/rejected": -2.3305342197418213, "step": 3715 }, { "epoch": 0.43, "learning_rate": 1.741074563970502e-07, "logits/chosen": -3.1602022647857666, "logits/rejected": -3.3269128799438477, "logps/chosen": -167.39398193359375, "logps/rejected": -263.35919189453125, "loss": 0.2527, "rewards/accuracies": 0.875, "rewards/chosen": -0.6800152063369751, "rewards/margins": 2.6010067462921143, "rewards/rejected": -3.281022071838379, "step": 3716 }, { "epoch": 0.43, "learning_rate": 1.7407233992742597e-07, "logits/chosen": -3.234210968017578, "logits/rejected": -3.3967504501342773, "logps/chosen": -195.07281494140625, "logps/rejected": -224.53060913085938, "loss": 0.2893, "rewards/accuracies": 0.875, "rewards/chosen": 0.08705976605415344, "rewards/margins": 2.4722471237182617, "rewards/rejected": -2.3851871490478516, "step": 3717 }, { "epoch": 0.43, "learning_rate": 1.740372234578017e-07, "logits/chosen": -2.4172005653381348, "logits/rejected": -2.2316784858703613, "logps/chosen": -399.85931396484375, "logps/rejected": -411.05389404296875, "loss": 0.6556, "rewards/accuracies": 0.625, "rewards/chosen": -0.06230878829956055, "rewards/margins": 0.8832308053970337, "rewards/rejected": -0.9455395936965942, "step": 3718 }, { "epoch": 0.43, "learning_rate": 1.7400210698817745e-07, "logits/chosen": -3.3011815547943115, "logits/rejected": -3.2946176528930664, "logps/chosen": -135.64401245117188, "logps/rejected": -176.7571258544922, "loss": 0.2016, "rewards/accuracies": 1.0, "rewards/chosen": 0.6940982937812805, "rewards/margins": 2.8332295417785645, "rewards/rejected": -2.1391313076019287, "step": 3719 }, { "epoch": 0.43, "learning_rate": 1.739669905185532e-07, "logits/chosen": -3.3727972507476807, "logits/rejected": -3.685126304626465, "logps/chosen": -250.3538360595703, "logps/rejected": -226.63839721679688, "loss": 0.1745, "rewards/accuracies": 1.0, "rewards/chosen": 0.14897069334983826, "rewards/margins": 2.8145577907562256, "rewards/rejected": -2.6655871868133545, "step": 3720 }, { "epoch": 0.43, "learning_rate": 1.7393187404892893e-07, "logits/chosen": -3.647068500518799, "logits/rejected": -3.5583415031433105, "logps/chosen": -202.49444580078125, "logps/rejected": -162.31961059570312, "loss": 1.1728, "rewards/accuracies": 0.5, "rewards/chosen": -0.8068774938583374, "rewards/margins": 0.5751646757125854, "rewards/rejected": -1.3820419311523438, "step": 3721 }, { "epoch": 0.43, "learning_rate": 1.7389675757930468e-07, "logits/chosen": -3.7852377891540527, "logits/rejected": -3.5614068508148193, "logps/chosen": -293.679443359375, "logps/rejected": -189.707275390625, "loss": 0.2695, "rewards/accuracies": 1.0, "rewards/chosen": -0.23389622569084167, "rewards/margins": 1.4923696517944336, "rewards/rejected": -1.726265788078308, "step": 3722 }, { "epoch": 0.43, "learning_rate": 1.7386164110968044e-07, "logits/chosen": -4.119531631469727, "logits/rejected": -3.844345808029175, "logps/chosen": -168.73733520507812, "logps/rejected": -120.65850830078125, "loss": 0.3971, "rewards/accuracies": 0.625, "rewards/chosen": 0.5362793803215027, "rewards/margins": 2.0037803649902344, "rewards/rejected": -1.4675010442733765, "step": 3723 }, { "epoch": 0.43, "learning_rate": 1.7382652464005616e-07, "logits/chosen": -2.9961299896240234, "logits/rejected": -3.2106330394744873, "logps/chosen": -140.75152587890625, "logps/rejected": -178.9505157470703, "loss": 0.5323, "rewards/accuracies": 0.75, "rewards/chosen": -0.24096640944480896, "rewards/margins": 0.8276091814041138, "rewards/rejected": -1.0685756206512451, "step": 3724 }, { "epoch": 0.43, "learning_rate": 1.7379140817043192e-07, "logits/chosen": -2.488757371902466, "logits/rejected": -2.621731758117676, "logps/chosen": -206.59608459472656, "logps/rejected": -297.89263916015625, "loss": 0.8511, "rewards/accuracies": 0.5, "rewards/chosen": -0.4363895058631897, "rewards/margins": 0.003753870725631714, "rewards/rejected": -0.440143346786499, "step": 3725 }, { "epoch": 0.43, "learning_rate": 1.7375629170080765e-07, "logits/chosen": -3.4577009677886963, "logits/rejected": -3.3677000999450684, "logps/chosen": -308.1956481933594, "logps/rejected": -261.90234375, "loss": 0.6174, "rewards/accuracies": 0.75, "rewards/chosen": -0.24059553444385529, "rewards/margins": 0.8099790215492249, "rewards/rejected": -1.0505746603012085, "step": 3726 }, { "epoch": 0.43, "learning_rate": 1.7372117523118343e-07, "logits/chosen": -3.6408326625823975, "logits/rejected": -3.6256370544433594, "logps/chosen": -173.5767364501953, "logps/rejected": -215.7896728515625, "loss": 0.3642, "rewards/accuracies": 0.875, "rewards/chosen": -0.6518775224685669, "rewards/margins": 1.1302363872528076, "rewards/rejected": -1.782113790512085, "step": 3727 }, { "epoch": 0.43, "learning_rate": 1.7368605876155918e-07, "logits/chosen": -2.5820603370666504, "logits/rejected": -2.4769129753112793, "logps/chosen": -270.1460876464844, "logps/rejected": -233.9062957763672, "loss": 0.5611, "rewards/accuracies": 0.5, "rewards/chosen": 0.4119492173194885, "rewards/margins": 0.6932536363601685, "rewards/rejected": -0.2813044488430023, "step": 3728 }, { "epoch": 0.43, "learning_rate": 1.736509422919349e-07, "logits/chosen": -3.1260123252868652, "logits/rejected": -3.0632143020629883, "logps/chosen": -311.21826171875, "logps/rejected": -293.10626220703125, "loss": 0.3418, "rewards/accuracies": 0.875, "rewards/chosen": 0.08253462612628937, "rewards/margins": 1.4716479778289795, "rewards/rejected": -1.389113426208496, "step": 3729 }, { "epoch": 0.43, "learning_rate": 1.7361582582231066e-07, "logits/chosen": -2.8321444988250732, "logits/rejected": -2.815842390060425, "logps/chosen": -397.676025390625, "logps/rejected": -257.3731689453125, "loss": 0.4558, "rewards/accuracies": 0.625, "rewards/chosen": -0.36626535654067993, "rewards/margins": 1.32305908203125, "rewards/rejected": -1.6893244981765747, "step": 3730 }, { "epoch": 0.43, "learning_rate": 1.7358070935268641e-07, "logits/chosen": -3.1440279483795166, "logits/rejected": -2.856450080871582, "logps/chosen": -201.34988403320312, "logps/rejected": -140.28057861328125, "loss": 0.4616, "rewards/accuracies": 0.75, "rewards/chosen": 0.07275040447711945, "rewards/margins": 1.1361089944839478, "rewards/rejected": -1.0633587837219238, "step": 3731 }, { "epoch": 0.43, "learning_rate": 1.7354559288306214e-07, "logits/chosen": -4.138597011566162, "logits/rejected": -4.043882846832275, "logps/chosen": -340.222900390625, "logps/rejected": -277.70758056640625, "loss": 0.3252, "rewards/accuracies": 0.875, "rewards/chosen": -0.17927579581737518, "rewards/margins": 1.4108481407165527, "rewards/rejected": -1.5901238918304443, "step": 3732 }, { "epoch": 0.43, "learning_rate": 1.735104764134379e-07, "logits/chosen": -2.805663585662842, "logits/rejected": -2.718316078186035, "logps/chosen": -342.8249206542969, "logps/rejected": -412.2599182128906, "loss": 0.2897, "rewards/accuracies": 0.875, "rewards/chosen": -0.16609282791614532, "rewards/margins": 3.0698304176330566, "rewards/rejected": -3.2359232902526855, "step": 3733 }, { "epoch": 0.43, "learning_rate": 1.7347535994381362e-07, "logits/chosen": -3.0370609760284424, "logits/rejected": -3.0466582775115967, "logps/chosen": -354.747802734375, "logps/rejected": -263.7821350097656, "loss": 0.5792, "rewards/accuracies": 0.75, "rewards/chosen": -0.8503535985946655, "rewards/margins": 0.4751267433166504, "rewards/rejected": -1.325480341911316, "step": 3734 }, { "epoch": 0.43, "learning_rate": 1.7344024347418938e-07, "logits/chosen": -2.662426710128784, "logits/rejected": -2.6668052673339844, "logps/chosen": -557.9041137695312, "logps/rejected": -413.247802734375, "loss": 0.3101, "rewards/accuracies": 0.875, "rewards/chosen": 0.011926829814910889, "rewards/margins": 2.16447114944458, "rewards/rejected": -2.1525444984436035, "step": 3735 }, { "epoch": 0.43, "learning_rate": 1.7340512700456513e-07, "logits/chosen": -3.9934606552124023, "logits/rejected": -3.525285482406616, "logps/chosen": -261.7499694824219, "logps/rejected": -169.47398376464844, "loss": 0.4763, "rewards/accuracies": 0.875, "rewards/chosen": 0.061528608202934265, "rewards/margins": 1.1511785984039307, "rewards/rejected": -1.08965003490448, "step": 3736 }, { "epoch": 0.43, "learning_rate": 1.7337001053494086e-07, "logits/chosen": -3.422666072845459, "logits/rejected": -4.150254726409912, "logps/chosen": -183.86012268066406, "logps/rejected": -279.6000061035156, "loss": 0.6761, "rewards/accuracies": 0.5, "rewards/chosen": -1.0509915351867676, "rewards/margins": 0.8170249462127686, "rewards/rejected": -1.8680164813995361, "step": 3737 }, { "epoch": 0.43, "learning_rate": 1.7333489406531664e-07, "logits/chosen": -3.2007670402526855, "logits/rejected": -3.1191704273223877, "logps/chosen": -96.04226684570312, "logps/rejected": -133.2779541015625, "loss": 0.6599, "rewards/accuracies": 0.75, "rewards/chosen": -0.5465809106826782, "rewards/margins": 0.23731614649295807, "rewards/rejected": -0.7838970422744751, "step": 3738 }, { "epoch": 0.43, "learning_rate": 1.732997775956924e-07, "logits/chosen": -2.756735324859619, "logits/rejected": -2.9342527389526367, "logps/chosen": -307.6697998046875, "logps/rejected": -215.6663360595703, "loss": 0.3546, "rewards/accuracies": 0.75, "rewards/chosen": 0.11521779745817184, "rewards/margins": 1.4246940612792969, "rewards/rejected": -1.3094762563705444, "step": 3739 }, { "epoch": 0.43, "learning_rate": 1.7326466112606812e-07, "logits/chosen": -3.2743749618530273, "logits/rejected": -3.680750608444214, "logps/chosen": -333.0386962890625, "logps/rejected": -244.85873413085938, "loss": 0.605, "rewards/accuracies": 0.5, "rewards/chosen": -0.7138423919677734, "rewards/margins": 1.7085479497909546, "rewards/rejected": -2.4223904609680176, "step": 3740 }, { "epoch": 0.43, "learning_rate": 1.7322954465644387e-07, "logits/chosen": -4.102250576019287, "logits/rejected": -4.1101460456848145, "logps/chosen": -258.7186279296875, "logps/rejected": -266.19683837890625, "loss": 0.1982, "rewards/accuracies": 1.0, "rewards/chosen": 0.1363341212272644, "rewards/margins": 3.0530388355255127, "rewards/rejected": -2.9167046546936035, "step": 3741 }, { "epoch": 0.43, "learning_rate": 1.731944281868196e-07, "logits/chosen": -2.7496142387390137, "logits/rejected": -2.7995071411132812, "logps/chosen": -209.58712768554688, "logps/rejected": -242.10256958007812, "loss": 0.5387, "rewards/accuracies": 0.75, "rewards/chosen": -0.572131335735321, "rewards/margins": 0.8373973965644836, "rewards/rejected": -1.4095287322998047, "step": 3742 }, { "epoch": 0.43, "learning_rate": 1.7315931171719536e-07, "logits/chosen": -3.748307228088379, "logits/rejected": -3.2961785793304443, "logps/chosen": -376.5100402832031, "logps/rejected": -241.3789825439453, "loss": 0.1951, "rewards/accuracies": 1.0, "rewards/chosen": 0.09915222227573395, "rewards/margins": 2.2838025093078613, "rewards/rejected": -2.184650421142578, "step": 3743 }, { "epoch": 0.43, "learning_rate": 1.731241952475711e-07, "logits/chosen": -3.3801889419555664, "logits/rejected": -3.3140628337860107, "logps/chosen": -205.1801300048828, "logps/rejected": -168.0063018798828, "loss": 1.0723, "rewards/accuracies": 0.625, "rewards/chosen": -1.024546504020691, "rewards/margins": 0.4213202893733978, "rewards/rejected": -1.4458668231964111, "step": 3744 }, { "epoch": 0.43, "learning_rate": 1.7308907877794684e-07, "logits/chosen": -3.621767520904541, "logits/rejected": -3.583552837371826, "logps/chosen": -223.68772888183594, "logps/rejected": -161.5611114501953, "loss": 0.3444, "rewards/accuracies": 0.875, "rewards/chosen": -0.0510859340429306, "rewards/margins": 1.5853151082992554, "rewards/rejected": -1.6364010572433472, "step": 3745 }, { "epoch": 0.43, "learning_rate": 1.730539623083226e-07, "logits/chosen": -3.3164844512939453, "logits/rejected": -3.103489637374878, "logps/chosen": -266.2097473144531, "logps/rejected": -282.68487548828125, "loss": 0.3769, "rewards/accuracies": 0.75, "rewards/chosen": -0.4207172989845276, "rewards/margins": 1.3649448156356812, "rewards/rejected": -1.7856621742248535, "step": 3746 }, { "epoch": 0.43, "learning_rate": 1.7301884583869837e-07, "logits/chosen": -2.949655532836914, "logits/rejected": -3.2077231407165527, "logps/chosen": -271.07037353515625, "logps/rejected": -225.57838439941406, "loss": 0.3926, "rewards/accuracies": 0.875, "rewards/chosen": -0.26247358322143555, "rewards/margins": 1.007907509803772, "rewards/rejected": -1.270380973815918, "step": 3747 }, { "epoch": 0.43, "learning_rate": 1.7298372936907407e-07, "logits/chosen": -2.7848761081695557, "logits/rejected": -2.8310413360595703, "logps/chosen": -120.28494262695312, "logps/rejected": -199.54537963867188, "loss": 0.3203, "rewards/accuracies": 1.0, "rewards/chosen": -0.40132513642311096, "rewards/margins": 1.4209840297698975, "rewards/rejected": -1.8223092555999756, "step": 3748 }, { "epoch": 0.43, "learning_rate": 1.7294861289944985e-07, "logits/chosen": -3.4358983039855957, "logits/rejected": -3.021350860595703, "logps/chosen": -209.57484436035156, "logps/rejected": -181.9486541748047, "loss": 0.3157, "rewards/accuracies": 0.875, "rewards/chosen": -0.03155313432216644, "rewards/margins": 1.6744012832641602, "rewards/rejected": -1.7059543132781982, "step": 3749 }, { "epoch": 0.43, "learning_rate": 1.7291349642982558e-07, "logits/chosen": -3.9762468338012695, "logits/rejected": -3.746121883392334, "logps/chosen": -254.10276794433594, "logps/rejected": -291.08160400390625, "loss": 0.613, "rewards/accuracies": 0.75, "rewards/chosen": -0.9535631537437439, "rewards/margins": 1.3152574300765991, "rewards/rejected": -2.2688205242156982, "step": 3750 }, { "epoch": 0.43, "learning_rate": 1.7287837996020133e-07, "logits/chosen": -3.0338549613952637, "logits/rejected": -3.205471992492676, "logps/chosen": -245.88043212890625, "logps/rejected": -345.44879150390625, "loss": 0.2955, "rewards/accuracies": 0.75, "rewards/chosen": 0.6738663911819458, "rewards/margins": 2.2599024772644043, "rewards/rejected": -1.5860360860824585, "step": 3751 }, { "epoch": 0.43, "learning_rate": 1.728432634905771e-07, "logits/chosen": -3.031090021133423, "logits/rejected": -3.026207447052002, "logps/chosen": -182.75103759765625, "logps/rejected": -260.35736083984375, "loss": 0.4836, "rewards/accuracies": 0.75, "rewards/chosen": -0.3902100920677185, "rewards/margins": 2.264827013015747, "rewards/rejected": -2.6550369262695312, "step": 3752 }, { "epoch": 0.43, "learning_rate": 1.7280814702095281e-07, "logits/chosen": -3.0521602630615234, "logits/rejected": -2.8582916259765625, "logps/chosen": -260.48919677734375, "logps/rejected": -175.30770874023438, "loss": 0.3976, "rewards/accuracies": 0.75, "rewards/chosen": -0.0617675706744194, "rewards/margins": 1.4435043334960938, "rewards/rejected": -1.5052720308303833, "step": 3753 }, { "epoch": 0.43, "learning_rate": 1.7277303055132857e-07, "logits/chosen": -3.5169730186462402, "logits/rejected": -3.267887592315674, "logps/chosen": -179.4544677734375, "logps/rejected": -171.11480712890625, "loss": 0.5368, "rewards/accuracies": 0.625, "rewards/chosen": 0.24344421923160553, "rewards/margins": 0.7576186656951904, "rewards/rejected": -0.5141744613647461, "step": 3754 }, { "epoch": 0.43, "learning_rate": 1.7273791408170432e-07, "logits/chosen": -3.2177040576934814, "logits/rejected": -3.2704086303710938, "logps/chosen": -311.4418029785156, "logps/rejected": -290.2576904296875, "loss": 0.2421, "rewards/accuracies": 0.875, "rewards/chosen": 0.08006151020526886, "rewards/margins": 2.496225357055664, "rewards/rejected": -2.416163682937622, "step": 3755 }, { "epoch": 0.43, "learning_rate": 1.7270279761208005e-07, "logits/chosen": -2.321563482284546, "logits/rejected": -2.388504981994629, "logps/chosen": -403.6138916015625, "logps/rejected": -243.88693237304688, "loss": 0.2603, "rewards/accuracies": 1.0, "rewards/chosen": 0.7375403642654419, "rewards/margins": 1.9392553567886353, "rewards/rejected": -1.2017149925231934, "step": 3756 }, { "epoch": 0.43, "learning_rate": 1.726676811424558e-07, "logits/chosen": -3.4286608695983887, "logits/rejected": -3.783137798309326, "logps/chosen": -210.37037658691406, "logps/rejected": -304.6061706542969, "loss": 0.5576, "rewards/accuracies": 0.625, "rewards/chosen": 0.49288368225097656, "rewards/margins": 0.9996010065078735, "rewards/rejected": -0.5067174434661865, "step": 3757 }, { "epoch": 0.43, "learning_rate": 1.7263256467283153e-07, "logits/chosen": -2.32926344871521, "logits/rejected": -2.346259832382202, "logps/chosen": -231.8414764404297, "logps/rejected": -225.57867431640625, "loss": 0.3891, "rewards/accuracies": 0.75, "rewards/chosen": 0.5577883720397949, "rewards/margins": 1.288053035736084, "rewards/rejected": -0.7302647829055786, "step": 3758 }, { "epoch": 0.43, "learning_rate": 1.7259744820320728e-07, "logits/chosen": -3.015768527984619, "logits/rejected": -2.9672882556915283, "logps/chosen": -216.74713134765625, "logps/rejected": -222.57928466796875, "loss": 0.3199, "rewards/accuracies": 0.875, "rewards/chosen": -0.046950459480285645, "rewards/margins": 2.1382627487182617, "rewards/rejected": -2.185213327407837, "step": 3759 }, { "epoch": 0.43, "learning_rate": 1.7256233173358306e-07, "logits/chosen": -2.8012619018554688, "logits/rejected": -2.857393264770508, "logps/chosen": -482.74237060546875, "logps/rejected": -297.6199951171875, "loss": 0.324, "rewards/accuracies": 0.75, "rewards/chosen": 0.3245088458061218, "rewards/margins": 2.26320481300354, "rewards/rejected": -1.938696026802063, "step": 3760 }, { "epoch": 0.43, "learning_rate": 1.725272152639588e-07, "logits/chosen": -3.3329856395721436, "logits/rejected": -3.1899263858795166, "logps/chosen": -255.1383819580078, "logps/rejected": -296.7931213378906, "loss": 0.2929, "rewards/accuracies": 0.75, "rewards/chosen": -0.25162559747695923, "rewards/margins": 3.2806930541992188, "rewards/rejected": -3.532318592071533, "step": 3761 }, { "epoch": 0.43, "learning_rate": 1.7249209879433455e-07, "logits/chosen": -3.172010660171509, "logits/rejected": -2.808654546737671, "logps/chosen": -268.28656005859375, "logps/rejected": -240.1605224609375, "loss": 0.4486, "rewards/accuracies": 0.625, "rewards/chosen": 0.09425263106822968, "rewards/margins": 1.5832979679107666, "rewards/rejected": -1.4890453815460205, "step": 3762 }, { "epoch": 0.43, "learning_rate": 1.7245698232471027e-07, "logits/chosen": -3.5375232696533203, "logits/rejected": -3.709341526031494, "logps/chosen": -220.43634033203125, "logps/rejected": -221.7480010986328, "loss": 0.2565, "rewards/accuracies": 0.875, "rewards/chosen": 0.2044648975133896, "rewards/margins": 1.8968849182128906, "rewards/rejected": -1.6924200057983398, "step": 3763 }, { "epoch": 0.43, "learning_rate": 1.7242186585508603e-07, "logits/chosen": -3.24182391166687, "logits/rejected": -3.2131035327911377, "logps/chosen": -242.82884216308594, "logps/rejected": -281.5714111328125, "loss": 0.1565, "rewards/accuracies": 1.0, "rewards/chosen": 0.1911657750606537, "rewards/margins": 3.1873292922973633, "rewards/rejected": -2.9961633682250977, "step": 3764 }, { "epoch": 0.43, "learning_rate": 1.7238674938546178e-07, "logits/chosen": -2.767106056213379, "logits/rejected": -2.4081058502197266, "logps/chosen": -290.01239013671875, "logps/rejected": -484.33319091796875, "loss": 0.5825, "rewards/accuracies": 0.75, "rewards/chosen": -0.1530022919178009, "rewards/margins": 0.7826467752456665, "rewards/rejected": -0.935649037361145, "step": 3765 }, { "epoch": 0.43, "learning_rate": 1.723516329158375e-07, "logits/chosen": -2.8416781425476074, "logits/rejected": -2.5857510566711426, "logps/chosen": -386.83282470703125, "logps/rejected": -217.19271850585938, "loss": 0.5377, "rewards/accuracies": 0.75, "rewards/chosen": 0.2793276906013489, "rewards/margins": 1.8353736400604248, "rewards/rejected": -1.5560457706451416, "step": 3766 }, { "epoch": 0.43, "learning_rate": 1.7231651644621326e-07, "logits/chosen": -2.7575252056121826, "logits/rejected": -2.584373712539673, "logps/chosen": -279.595947265625, "logps/rejected": -392.8921203613281, "loss": 0.3786, "rewards/accuracies": 0.75, "rewards/chosen": 0.09211406856775284, "rewards/margins": 3.1375341415405273, "rewards/rejected": -3.045419931411743, "step": 3767 }, { "epoch": 0.43, "learning_rate": 1.7228139997658902e-07, "logits/chosen": -3.4872541427612305, "logits/rejected": -3.5614116191864014, "logps/chosen": -320.3623046875, "logps/rejected": -273.0469055175781, "loss": 0.6961, "rewards/accuracies": 0.5, "rewards/chosen": 0.12884853780269623, "rewards/margins": 0.7124890089035034, "rewards/rejected": -0.5836405158042908, "step": 3768 }, { "epoch": 0.43, "learning_rate": 1.7224628350696474e-07, "logits/chosen": -3.2136921882629395, "logits/rejected": -2.9907727241516113, "logps/chosen": -311.1197204589844, "logps/rejected": -283.3961486816406, "loss": 0.2554, "rewards/accuracies": 1.0, "rewards/chosen": -0.01953045278787613, "rewards/margins": 1.861210823059082, "rewards/rejected": -1.8807411193847656, "step": 3769 }, { "epoch": 0.43, "learning_rate": 1.722111670373405e-07, "logits/chosen": -2.921227216720581, "logits/rejected": -2.9075517654418945, "logps/chosen": -140.94256591796875, "logps/rejected": -171.49383544921875, "loss": 0.3574, "rewards/accuracies": 0.75, "rewards/chosen": -0.11262717843055725, "rewards/margins": 1.8568111658096313, "rewards/rejected": -1.9694383144378662, "step": 3770 }, { "epoch": 0.43, "learning_rate": 1.7217605056771622e-07, "logits/chosen": -3.271697759628296, "logits/rejected": -3.0364036560058594, "logps/chosen": -601.8358764648438, "logps/rejected": -228.93771362304688, "loss": 0.4102, "rewards/accuracies": 0.75, "rewards/chosen": -0.6326030492782593, "rewards/margins": 2.4144575595855713, "rewards/rejected": -3.047060489654541, "step": 3771 }, { "epoch": 0.43, "learning_rate": 1.72140934098092e-07, "logits/chosen": -3.837602376937866, "logits/rejected": -3.569702625274658, "logps/chosen": -294.4512634277344, "logps/rejected": -239.8753204345703, "loss": 0.3705, "rewards/accuracies": 0.75, "rewards/chosen": 0.32728689908981323, "rewards/margins": 2.192551612854004, "rewards/rejected": -1.8652647733688354, "step": 3772 }, { "epoch": 0.43, "learning_rate": 1.7210581762846776e-07, "logits/chosen": -2.7564144134521484, "logits/rejected": -2.894111156463623, "logps/chosen": -262.4818115234375, "logps/rejected": -287.537353515625, "loss": 0.8863, "rewards/accuracies": 0.375, "rewards/chosen": -0.8129072785377502, "rewards/margins": 0.1512846052646637, "rewards/rejected": -0.9641919136047363, "step": 3773 }, { "epoch": 0.44, "learning_rate": 1.7207070115884349e-07, "logits/chosen": -2.9237234592437744, "logits/rejected": -3.1189920902252197, "logps/chosen": -157.84848022460938, "logps/rejected": -245.73715209960938, "loss": 0.2747, "rewards/accuracies": 0.875, "rewards/chosen": 0.2860395610332489, "rewards/margins": 3.3897221088409424, "rewards/rejected": -3.103682518005371, "step": 3774 }, { "epoch": 0.44, "learning_rate": 1.7203558468921924e-07, "logits/chosen": -2.9473421573638916, "logits/rejected": -3.088089942932129, "logps/chosen": -230.54855346679688, "logps/rejected": -180.13418579101562, "loss": 0.3523, "rewards/accuracies": 0.875, "rewards/chosen": -0.2866535782814026, "rewards/margins": 1.3763785362243652, "rewards/rejected": -1.6630322933197021, "step": 3775 }, { "epoch": 0.44, "learning_rate": 1.72000468219595e-07, "logits/chosen": -2.932097911834717, "logits/rejected": -2.9439802169799805, "logps/chosen": -413.0455627441406, "logps/rejected": -314.57952880859375, "loss": 0.2752, "rewards/accuracies": 0.875, "rewards/chosen": -0.2476312518119812, "rewards/margins": 1.8542098999023438, "rewards/rejected": -2.1018409729003906, "step": 3776 }, { "epoch": 0.44, "learning_rate": 1.7196535174997072e-07, "logits/chosen": -3.646982192993164, "logits/rejected": -3.5575196743011475, "logps/chosen": -363.8162536621094, "logps/rejected": -284.8108215332031, "loss": 0.5042, "rewards/accuracies": 0.75, "rewards/chosen": -0.15030869841575623, "rewards/margins": 0.9669332504272461, "rewards/rejected": -1.1172419786453247, "step": 3777 }, { "epoch": 0.44, "learning_rate": 1.7193023528034648e-07, "logits/chosen": -3.6191353797912598, "logits/rejected": -3.0513086318969727, "logps/chosen": -154.88742065429688, "logps/rejected": -191.8935546875, "loss": 0.4521, "rewards/accuracies": 0.75, "rewards/chosen": -0.5188064575195312, "rewards/margins": 2.0743510723114014, "rewards/rejected": -2.5931572914123535, "step": 3778 }, { "epoch": 0.44, "learning_rate": 1.718951188107222e-07, "logits/chosen": -3.2513442039489746, "logits/rejected": -3.0559425354003906, "logps/chosen": -340.1258239746094, "logps/rejected": -213.18914794921875, "loss": 0.2692, "rewards/accuracies": 0.875, "rewards/chosen": 0.23339098691940308, "rewards/margins": 1.9659757614135742, "rewards/rejected": -1.732584834098816, "step": 3779 }, { "epoch": 0.44, "learning_rate": 1.7186000234109796e-07, "logits/chosen": -3.9758970737457275, "logits/rejected": -3.5776596069335938, "logps/chosen": -285.97210693359375, "logps/rejected": -251.82049560546875, "loss": 0.1721, "rewards/accuracies": 0.875, "rewards/chosen": 0.39764294028282166, "rewards/margins": 3.1666245460510254, "rewards/rejected": -2.7689812183380127, "step": 3780 }, { "epoch": 0.44, "learning_rate": 1.718248858714737e-07, "logits/chosen": -3.3483729362487793, "logits/rejected": -3.2296245098114014, "logps/chosen": -150.41799926757812, "logps/rejected": -184.947021484375, "loss": 0.4254, "rewards/accuracies": 0.75, "rewards/chosen": -0.3149542808532715, "rewards/margins": 1.4023830890655518, "rewards/rejected": -1.7173373699188232, "step": 3781 }, { "epoch": 0.44, "learning_rate": 1.7178976940184944e-07, "logits/chosen": -3.3945565223693848, "logits/rejected": -3.072197914123535, "logps/chosen": -397.41107177734375, "logps/rejected": -263.0228271484375, "loss": 0.2505, "rewards/accuracies": 1.0, "rewards/chosen": -0.30891740322113037, "rewards/margins": 1.6927427053451538, "rewards/rejected": -2.001660108566284, "step": 3782 }, { "epoch": 0.44, "learning_rate": 1.7175465293222522e-07, "logits/chosen": -3.784128189086914, "logits/rejected": -3.7780697345733643, "logps/chosen": -211.1038818359375, "logps/rejected": -199.1459197998047, "loss": 0.2644, "rewards/accuracies": 0.875, "rewards/chosen": 0.38433822989463806, "rewards/margins": 1.9595481157302856, "rewards/rejected": -1.5752098560333252, "step": 3783 }, { "epoch": 0.44, "learning_rate": 1.7171953646260097e-07, "logits/chosen": -3.6336989402770996, "logits/rejected": -3.193356990814209, "logps/chosen": -223.29385375976562, "logps/rejected": -184.8057861328125, "loss": 0.2845, "rewards/accuracies": 1.0, "rewards/chosen": -0.4022286534309387, "rewards/margins": 1.5733940601348877, "rewards/rejected": -1.9756226539611816, "step": 3784 }, { "epoch": 0.44, "learning_rate": 1.716844199929767e-07, "logits/chosen": -3.471452236175537, "logits/rejected": -3.5129008293151855, "logps/chosen": -206.58956909179688, "logps/rejected": -193.74070739746094, "loss": 0.4442, "rewards/accuracies": 0.75, "rewards/chosen": -0.3440912961959839, "rewards/margins": 1.327117681503296, "rewards/rejected": -1.6712090969085693, "step": 3785 }, { "epoch": 0.44, "learning_rate": 1.7164930352335245e-07, "logits/chosen": -3.1886954307556152, "logits/rejected": -3.4079842567443848, "logps/chosen": -238.61163330078125, "logps/rejected": -235.8498077392578, "loss": 0.606, "rewards/accuracies": 0.75, "rewards/chosen": -0.34245002269744873, "rewards/margins": 1.023464322090149, "rewards/rejected": -1.365914225578308, "step": 3786 }, { "epoch": 0.44, "learning_rate": 1.7161418705372818e-07, "logits/chosen": -2.654240608215332, "logits/rejected": -2.445498466491699, "logps/chosen": -302.33251953125, "logps/rejected": -149.2390594482422, "loss": 0.7057, "rewards/accuracies": 0.625, "rewards/chosen": -0.33185315132141113, "rewards/margins": 0.4227653741836548, "rewards/rejected": -0.7546184062957764, "step": 3787 }, { "epoch": 0.44, "learning_rate": 1.7157907058410393e-07, "logits/chosen": -3.180511951446533, "logits/rejected": -3.46157169342041, "logps/chosen": -283.07147216796875, "logps/rejected": -301.0265197753906, "loss": 0.4253, "rewards/accuracies": 0.75, "rewards/chosen": -0.16283874213695526, "rewards/margins": 2.060246467590332, "rewards/rejected": -2.223085403442383, "step": 3788 }, { "epoch": 0.44, "learning_rate": 1.715439541144797e-07, "logits/chosen": -2.825237274169922, "logits/rejected": -2.553175926208496, "logps/chosen": -414.92095947265625, "logps/rejected": -333.3368835449219, "loss": 0.193, "rewards/accuracies": 1.0, "rewards/chosen": -0.07892120629549026, "rewards/margins": 2.303670883178711, "rewards/rejected": -2.38259220123291, "step": 3789 }, { "epoch": 0.44, "learning_rate": 1.7150883764485542e-07, "logits/chosen": -3.8301186561584473, "logits/rejected": -3.7145628929138184, "logps/chosen": -262.1764831542969, "logps/rejected": -131.44866943359375, "loss": 0.3186, "rewards/accuracies": 0.875, "rewards/chosen": 0.22470420598983765, "rewards/margins": 2.341282367706299, "rewards/rejected": -2.1165781021118164, "step": 3790 }, { "epoch": 0.44, "learning_rate": 1.7147372117523117e-07, "logits/chosen": -3.1890060901641846, "logits/rejected": -2.943732500076294, "logps/chosen": -360.22442626953125, "logps/rejected": -377.3102111816406, "loss": 0.2609, "rewards/accuracies": 0.875, "rewards/chosen": 0.3285140097141266, "rewards/margins": 2.470351457595825, "rewards/rejected": -2.1418375968933105, "step": 3791 }, { "epoch": 0.44, "learning_rate": 1.7143860470560695e-07, "logits/chosen": -2.6706409454345703, "logits/rejected": -2.5234575271606445, "logps/chosen": -349.8684997558594, "logps/rejected": -310.84259033203125, "loss": 0.4858, "rewards/accuracies": 0.75, "rewards/chosen": -0.14751070737838745, "rewards/margins": 0.8692659139633179, "rewards/rejected": -1.01677668094635, "step": 3792 }, { "epoch": 0.44, "learning_rate": 1.7140348823598265e-07, "logits/chosen": -3.367725133895874, "logits/rejected": -2.971841335296631, "logps/chosen": -309.6128234863281, "logps/rejected": -317.1749267578125, "loss": 0.3931, "rewards/accuracies": 0.875, "rewards/chosen": 0.03316380828619003, "rewards/margins": 1.6719704866409302, "rewards/rejected": -1.638806700706482, "step": 3793 }, { "epoch": 0.44, "learning_rate": 1.7136837176635843e-07, "logits/chosen": -2.906613826751709, "logits/rejected": -3.1899733543395996, "logps/chosen": -395.36065673828125, "logps/rejected": -236.57327270507812, "loss": 0.5778, "rewards/accuracies": 0.75, "rewards/chosen": -0.1592264175415039, "rewards/margins": 1.1905279159545898, "rewards/rejected": -1.3497543334960938, "step": 3794 }, { "epoch": 0.44, "learning_rate": 1.7133325529673416e-07, "logits/chosen": -3.7073416709899902, "logits/rejected": -3.016030788421631, "logps/chosen": -264.8912353515625, "logps/rejected": -239.44078063964844, "loss": 0.436, "rewards/accuracies": 0.75, "rewards/chosen": -0.1900431513786316, "rewards/margins": 1.4997045993804932, "rewards/rejected": -1.6897478103637695, "step": 3795 }, { "epoch": 0.44, "learning_rate": 1.712981388271099e-07, "logits/chosen": -4.088573932647705, "logits/rejected": -4.187191963195801, "logps/chosen": -167.92295837402344, "logps/rejected": -159.87496948242188, "loss": 0.2792, "rewards/accuracies": 0.875, "rewards/chosen": -0.018006712198257446, "rewards/margins": 1.915718674659729, "rewards/rejected": -1.933725357055664, "step": 3796 }, { "epoch": 0.44, "learning_rate": 1.7126302235748567e-07, "logits/chosen": -3.145259380340576, "logits/rejected": -3.128885269165039, "logps/chosen": -339.15179443359375, "logps/rejected": -443.16558837890625, "loss": 0.1871, "rewards/accuracies": 1.0, "rewards/chosen": 0.36675554513931274, "rewards/margins": 2.5216948986053467, "rewards/rejected": -2.1549394130706787, "step": 3797 }, { "epoch": 0.44, "learning_rate": 1.712279058878614e-07, "logits/chosen": -3.3041372299194336, "logits/rejected": -3.260498285293579, "logps/chosen": -514.44287109375, "logps/rejected": -352.6444091796875, "loss": 0.1446, "rewards/accuracies": 1.0, "rewards/chosen": 0.413865864276886, "rewards/margins": 3.511106491088867, "rewards/rejected": -3.097240686416626, "step": 3798 }, { "epoch": 0.44, "learning_rate": 1.7119278941823715e-07, "logits/chosen": -2.978891134262085, "logits/rejected": -2.7410731315612793, "logps/chosen": -175.6814727783203, "logps/rejected": -185.3964080810547, "loss": 0.5616, "rewards/accuracies": 0.75, "rewards/chosen": -0.276872456073761, "rewards/margins": 0.5685970783233643, "rewards/rejected": -0.84546959400177, "step": 3799 }, { "epoch": 0.44, "learning_rate": 1.711576729486129e-07, "logits/chosen": -2.6903491020202637, "logits/rejected": -2.850292921066284, "logps/chosen": -132.75621032714844, "logps/rejected": -304.98046875, "loss": 0.2971, "rewards/accuracies": 0.875, "rewards/chosen": -0.03744758665561676, "rewards/margins": 2.0476841926574707, "rewards/rejected": -2.0851316452026367, "step": 3800 }, { "epoch": 0.44, "learning_rate": 1.7112255647898863e-07, "logits/chosen": -3.384897470474243, "logits/rejected": -3.6092803478240967, "logps/chosen": -119.74797058105469, "logps/rejected": -150.85470581054688, "loss": 0.3868, "rewards/accuracies": 0.625, "rewards/chosen": 0.5893073081970215, "rewards/margins": 2.45082426071167, "rewards/rejected": -1.8615169525146484, "step": 3801 }, { "epoch": 0.44, "learning_rate": 1.7108744000936438e-07, "logits/chosen": -3.722026824951172, "logits/rejected": -3.499962329864502, "logps/chosen": -280.06085205078125, "logps/rejected": -276.5624084472656, "loss": 0.3727, "rewards/accuracies": 0.75, "rewards/chosen": -0.26659050583839417, "rewards/margins": 1.6953930854797363, "rewards/rejected": -1.9619836807250977, "step": 3802 }, { "epoch": 0.44, "learning_rate": 1.710523235397401e-07, "logits/chosen": -3.38175106048584, "logits/rejected": -3.1147313117980957, "logps/chosen": -286.04766845703125, "logps/rejected": -287.004150390625, "loss": 0.8426, "rewards/accuracies": 0.75, "rewards/chosen": -0.4416065514087677, "rewards/margins": 1.2828757762908936, "rewards/rejected": -1.7244822978973389, "step": 3803 }, { "epoch": 0.44, "learning_rate": 1.7101720707011586e-07, "logits/chosen": -3.362394332885742, "logits/rejected": -3.0913658142089844, "logps/chosen": -104.05601501464844, "logps/rejected": -131.61489868164062, "loss": 0.5674, "rewards/accuracies": 0.625, "rewards/chosen": -0.7701177000999451, "rewards/margins": 0.5198270082473755, "rewards/rejected": -1.2899446487426758, "step": 3804 }, { "epoch": 0.44, "learning_rate": 1.7098209060049164e-07, "logits/chosen": -2.964688539505005, "logits/rejected": -2.905113935470581, "logps/chosen": -317.737548828125, "logps/rejected": -255.02978515625, "loss": 0.5529, "rewards/accuracies": 0.625, "rewards/chosen": -0.05295287072658539, "rewards/margins": 1.0584172010421753, "rewards/rejected": -1.1113699674606323, "step": 3805 }, { "epoch": 0.44, "learning_rate": 1.7094697413086737e-07, "logits/chosen": -2.352909803390503, "logits/rejected": -2.485879421234131, "logps/chosen": -349.4520263671875, "logps/rejected": -396.7132263183594, "loss": 0.2704, "rewards/accuracies": 0.875, "rewards/chosen": 0.5364240407943726, "rewards/margins": 1.7808735370635986, "rewards/rejected": -1.2444497346878052, "step": 3806 }, { "epoch": 0.44, "learning_rate": 1.7091185766124313e-07, "logits/chosen": -3.4424283504486084, "logits/rejected": -3.2578954696655273, "logps/chosen": -224.66932678222656, "logps/rejected": -236.7154083251953, "loss": 0.4627, "rewards/accuracies": 0.75, "rewards/chosen": -0.3880527913570404, "rewards/margins": 0.9170973896980286, "rewards/rejected": -1.3051502704620361, "step": 3807 }, { "epoch": 0.44, "learning_rate": 1.7087674119161885e-07, "logits/chosen": -2.9239816665649414, "logits/rejected": -2.7686431407928467, "logps/chosen": -279.962890625, "logps/rejected": -280.96533203125, "loss": 0.2851, "rewards/accuracies": 0.75, "rewards/chosen": 0.7070999145507812, "rewards/margins": 2.0233676433563232, "rewards/rejected": -1.3162678480148315, "step": 3808 }, { "epoch": 0.44, "learning_rate": 1.708416247219946e-07, "logits/chosen": -2.91094708442688, "logits/rejected": -2.834409236907959, "logps/chosen": -206.5435333251953, "logps/rejected": -348.33905029296875, "loss": 0.5562, "rewards/accuracies": 0.625, "rewards/chosen": -0.100542813539505, "rewards/margins": 0.9231792092323303, "rewards/rejected": -1.0237220525741577, "step": 3809 }, { "epoch": 0.44, "learning_rate": 1.7080650825237036e-07, "logits/chosen": -3.069371461868286, "logits/rejected": -3.2963435649871826, "logps/chosen": -322.9842834472656, "logps/rejected": -290.70294189453125, "loss": 0.2629, "rewards/accuracies": 0.875, "rewards/chosen": 0.1866873800754547, "rewards/margins": 2.3205254077911377, "rewards/rejected": -2.133838176727295, "step": 3810 }, { "epoch": 0.44, "learning_rate": 1.707713917827461e-07, "logits/chosen": -3.197383165359497, "logits/rejected": -3.3959131240844727, "logps/chosen": -243.641845703125, "logps/rejected": -312.5992431640625, "loss": 0.2223, "rewards/accuracies": 1.0, "rewards/chosen": -0.19247426092624664, "rewards/margins": 2.029167890548706, "rewards/rejected": -2.221642255783081, "step": 3811 }, { "epoch": 0.44, "learning_rate": 1.7073627531312184e-07, "logits/chosen": -3.0703818798065186, "logits/rejected": -3.1225194931030273, "logps/chosen": -169.8203887939453, "logps/rejected": -267.2319030761719, "loss": 0.417, "rewards/accuracies": 0.75, "rewards/chosen": 0.06669960170984268, "rewards/margins": 2.3917927742004395, "rewards/rejected": -2.3250932693481445, "step": 3812 }, { "epoch": 0.44, "learning_rate": 1.707011588434976e-07, "logits/chosen": -3.377410650253296, "logits/rejected": -3.4691505432128906, "logps/chosen": -440.074462890625, "logps/rejected": -498.91485595703125, "loss": 0.4498, "rewards/accuracies": 0.75, "rewards/chosen": -0.12108853459358215, "rewards/margins": 3.088165760040283, "rewards/rejected": -3.209254503250122, "step": 3813 }, { "epoch": 0.44, "learning_rate": 1.7066604237387332e-07, "logits/chosen": -3.2385759353637695, "logits/rejected": -3.221871852874756, "logps/chosen": -396.2362060546875, "logps/rejected": -305.79925537109375, "loss": 0.6136, "rewards/accuracies": 0.625, "rewards/chosen": -0.09719810634851456, "rewards/margins": 0.8486742973327637, "rewards/rejected": -0.9458723068237305, "step": 3814 }, { "epoch": 0.44, "learning_rate": 1.7063092590424908e-07, "logits/chosen": -3.0420916080474854, "logits/rejected": -3.44242525100708, "logps/chosen": -207.71365356445312, "logps/rejected": -215.11241149902344, "loss": 0.2066, "rewards/accuracies": 1.0, "rewards/chosen": -0.11452966928482056, "rewards/margins": 2.885307550430298, "rewards/rejected": -2.9998371601104736, "step": 3815 }, { "epoch": 0.44, "learning_rate": 1.705958094346248e-07, "logits/chosen": -2.3081603050231934, "logits/rejected": -2.5343337059020996, "logps/chosen": -187.44094848632812, "logps/rejected": -200.67991638183594, "loss": 0.8023, "rewards/accuracies": 0.5, "rewards/chosen": -0.5093955397605896, "rewards/margins": 0.6127266883850098, "rewards/rejected": -1.1221221685409546, "step": 3816 }, { "epoch": 0.44, "learning_rate": 1.7056069296500058e-07, "logits/chosen": -2.81065034866333, "logits/rejected": -2.7185287475585938, "logps/chosen": -166.5787811279297, "logps/rejected": -156.59622192382812, "loss": 0.4313, "rewards/accuracies": 0.75, "rewards/chosen": 0.4592604339122772, "rewards/margins": 1.682938814163208, "rewards/rejected": -1.2236783504486084, "step": 3817 }, { "epoch": 0.44, "learning_rate": 1.7052557649537634e-07, "logits/chosen": -3.3730549812316895, "logits/rejected": -3.2173471450805664, "logps/chosen": -272.0760192871094, "logps/rejected": -369.3324279785156, "loss": 0.3986, "rewards/accuracies": 0.75, "rewards/chosen": -0.3932245373725891, "rewards/margins": 1.6551427841186523, "rewards/rejected": -2.0483672618865967, "step": 3818 }, { "epoch": 0.44, "learning_rate": 1.7049046002575207e-07, "logits/chosen": -3.074230670928955, "logits/rejected": -3.325981616973877, "logps/chosen": -208.9912872314453, "logps/rejected": -241.4286346435547, "loss": 0.4788, "rewards/accuracies": 0.75, "rewards/chosen": -0.05713619291782379, "rewards/margins": 1.1786962747573853, "rewards/rejected": -1.2358324527740479, "step": 3819 }, { "epoch": 0.44, "learning_rate": 1.7045534355612782e-07, "logits/chosen": -3.706920623779297, "logits/rejected": -3.5391039848327637, "logps/chosen": -191.14913940429688, "logps/rejected": -220.66818237304688, "loss": 0.3089, "rewards/accuracies": 0.75, "rewards/chosen": -0.10462673008441925, "rewards/margins": 2.4020633697509766, "rewards/rejected": -2.50669002532959, "step": 3820 }, { "epoch": 0.44, "learning_rate": 1.7042022708650357e-07, "logits/chosen": -2.6009182929992676, "logits/rejected": -2.45725154876709, "logps/chosen": -111.2066421508789, "logps/rejected": -121.3702163696289, "loss": 1.0407, "rewards/accuracies": 0.5, "rewards/chosen": -1.2581225633621216, "rewards/margins": -0.01598186045885086, "rewards/rejected": -1.2421408891677856, "step": 3821 }, { "epoch": 0.44, "learning_rate": 1.703851106168793e-07, "logits/chosen": -3.23268985748291, "logits/rejected": -3.356743335723877, "logps/chosen": -90.07105255126953, "logps/rejected": -192.77630615234375, "loss": 0.2798, "rewards/accuracies": 0.875, "rewards/chosen": 0.057198844850063324, "rewards/margins": 1.8842376470565796, "rewards/rejected": -1.8270388841629028, "step": 3822 }, { "epoch": 0.44, "learning_rate": 1.7034999414725505e-07, "logits/chosen": -2.639680862426758, "logits/rejected": -2.80167555809021, "logps/chosen": -200.57904052734375, "logps/rejected": -316.2478332519531, "loss": 0.3836, "rewards/accuracies": 0.75, "rewards/chosen": 0.05684966593980789, "rewards/margins": 2.7982258796691895, "rewards/rejected": -2.7413763999938965, "step": 3823 }, { "epoch": 0.44, "learning_rate": 1.7031487767763078e-07, "logits/chosen": -3.003185510635376, "logits/rejected": -3.0988316535949707, "logps/chosen": -154.01751708984375, "logps/rejected": -225.97911071777344, "loss": 0.5552, "rewards/accuracies": 0.625, "rewards/chosen": -0.06929926574230194, "rewards/margins": 1.1516435146331787, "rewards/rejected": -1.2209429740905762, "step": 3824 }, { "epoch": 0.44, "learning_rate": 1.7027976120800654e-07, "logits/chosen": -3.283865213394165, "logits/rejected": -3.0429017543792725, "logps/chosen": -224.4901885986328, "logps/rejected": -125.26390838623047, "loss": 0.4067, "rewards/accuracies": 0.75, "rewards/chosen": -0.236540287733078, "rewards/margins": 1.3416614532470703, "rewards/rejected": -1.5782018899917603, "step": 3825 }, { "epoch": 0.44, "learning_rate": 1.7024464473838232e-07, "logits/chosen": -3.0854709148406982, "logits/rejected": -3.2770776748657227, "logps/chosen": -180.8231964111328, "logps/rejected": -200.98545837402344, "loss": 0.2368, "rewards/accuracies": 0.875, "rewards/chosen": -0.06526744365692139, "rewards/margins": 2.7088186740875244, "rewards/rejected": -2.7740859985351562, "step": 3826 }, { "epoch": 0.44, "learning_rate": 1.7020952826875802e-07, "logits/chosen": -2.8152170181274414, "logits/rejected": -2.7437825202941895, "logps/chosen": -214.0752716064453, "logps/rejected": -276.72247314453125, "loss": 0.2614, "rewards/accuracies": 0.875, "rewards/chosen": 0.2248414158821106, "rewards/margins": 2.4227190017700195, "rewards/rejected": -2.1978774070739746, "step": 3827 }, { "epoch": 0.44, "learning_rate": 1.701744117991338e-07, "logits/chosen": -2.9112558364868164, "logits/rejected": -2.6681814193725586, "logps/chosen": -363.1322937011719, "logps/rejected": -233.21099853515625, "loss": 0.4897, "rewards/accuracies": 0.75, "rewards/chosen": -0.42571067810058594, "rewards/margins": 1.8968596458435059, "rewards/rejected": -2.322570323944092, "step": 3828 }, { "epoch": 0.44, "learning_rate": 1.7013929532950955e-07, "logits/chosen": -3.256643295288086, "logits/rejected": -2.9688034057617188, "logps/chosen": -126.83967590332031, "logps/rejected": -112.01837158203125, "loss": 0.6386, "rewards/accuracies": 0.5, "rewards/chosen": -0.3388954997062683, "rewards/margins": 0.8048197627067566, "rewards/rejected": -1.1437151432037354, "step": 3829 }, { "epoch": 0.44, "learning_rate": 1.7010417885988528e-07, "logits/chosen": -2.7688357830047607, "logits/rejected": -2.922804355621338, "logps/chosen": -228.9270477294922, "logps/rejected": -168.8568115234375, "loss": 0.3219, "rewards/accuracies": 0.75, "rewards/chosen": 0.5096137523651123, "rewards/margins": 2.0172204971313477, "rewards/rejected": -1.5076066255569458, "step": 3830 }, { "epoch": 0.44, "learning_rate": 1.7006906239026103e-07, "logits/chosen": -3.156186819076538, "logits/rejected": -3.1783320903778076, "logps/chosen": -167.80462646484375, "logps/rejected": -199.39089965820312, "loss": 0.3822, "rewards/accuracies": 0.875, "rewards/chosen": -0.21322020888328552, "rewards/margins": 1.2089229822158813, "rewards/rejected": -1.4221431016921997, "step": 3831 }, { "epoch": 0.44, "learning_rate": 1.7003394592063676e-07, "logits/chosen": -3.0764353275299072, "logits/rejected": -3.278012275695801, "logps/chosen": -307.578369140625, "logps/rejected": -280.45294189453125, "loss": 0.1877, "rewards/accuracies": 1.0, "rewards/chosen": 0.2797574996948242, "rewards/margins": 3.3237645626068115, "rewards/rejected": -3.0440073013305664, "step": 3832 }, { "epoch": 0.44, "learning_rate": 1.6999882945101251e-07, "logits/chosen": -3.2776849269866943, "logits/rejected": -3.784109115600586, "logps/chosen": -178.92681884765625, "logps/rejected": -177.3022003173828, "loss": 0.4214, "rewards/accuracies": 0.875, "rewards/chosen": 0.17100583016872406, "rewards/margins": 1.5860283374786377, "rewards/rejected": -1.415022373199463, "step": 3833 }, { "epoch": 0.44, "learning_rate": 1.6996371298138827e-07, "logits/chosen": -2.627293586730957, "logits/rejected": -2.6379499435424805, "logps/chosen": -228.23289489746094, "logps/rejected": -223.85821533203125, "loss": 0.2454, "rewards/accuracies": 0.875, "rewards/chosen": 0.19243809580802917, "rewards/margins": 1.8751585483551025, "rewards/rejected": -1.682720422744751, "step": 3834 }, { "epoch": 0.44, "learning_rate": 1.69928596511764e-07, "logits/chosen": -2.817966938018799, "logits/rejected": -3.147529363632202, "logps/chosen": -201.0763702392578, "logps/rejected": -159.84628295898438, "loss": 0.6622, "rewards/accuracies": 0.875, "rewards/chosen": -0.06648050248622894, "rewards/margins": 1.218529462814331, "rewards/rejected": -1.2850098609924316, "step": 3835 }, { "epoch": 0.44, "learning_rate": 1.6989348004213975e-07, "logits/chosen": -2.8921432495117188, "logits/rejected": -2.9727940559387207, "logps/chosen": -406.6605529785156, "logps/rejected": -414.479736328125, "loss": 0.4338, "rewards/accuracies": 0.75, "rewards/chosen": 0.41246211528778076, "rewards/margins": 1.2892175912857056, "rewards/rejected": -0.87675541639328, "step": 3836 }, { "epoch": 0.44, "learning_rate": 1.6985836357251553e-07, "logits/chosen": -2.975193500518799, "logits/rejected": -2.8656649589538574, "logps/chosen": -373.5258483886719, "logps/rejected": -229.15151977539062, "loss": 0.4598, "rewards/accuracies": 0.75, "rewards/chosen": -0.2931094169616699, "rewards/margins": 1.3585842847824097, "rewards/rejected": -1.6516937017440796, "step": 3837 }, { "epoch": 0.44, "learning_rate": 1.6982324710289123e-07, "logits/chosen": -2.90350604057312, "logits/rejected": -2.9982352256774902, "logps/chosen": -224.6698760986328, "logps/rejected": -192.93051147460938, "loss": 0.3323, "rewards/accuracies": 0.75, "rewards/chosen": 0.3522271513938904, "rewards/margins": 2.0066092014312744, "rewards/rejected": -1.6543821096420288, "step": 3838 }, { "epoch": 0.44, "learning_rate": 1.69788130633267e-07, "logits/chosen": -3.630878210067749, "logits/rejected": -3.901643753051758, "logps/chosen": -105.46337890625, "logps/rejected": -161.8000030517578, "loss": 0.2042, "rewards/accuracies": 1.0, "rewards/chosen": 0.4509018361568451, "rewards/margins": 1.8903559446334839, "rewards/rejected": -1.4394540786743164, "step": 3839 }, { "epoch": 0.44, "learning_rate": 1.6975301416364274e-07, "logits/chosen": -3.1081881523132324, "logits/rejected": -3.775651454925537, "logps/chosen": -216.453857421875, "logps/rejected": -443.6360778808594, "loss": 0.1411, "rewards/accuracies": 1.0, "rewards/chosen": 0.0730377659201622, "rewards/margins": 3.4959614276885986, "rewards/rejected": -3.4229235649108887, "step": 3840 }, { "epoch": 0.44, "learning_rate": 1.697178976940185e-07, "logits/chosen": -3.491589069366455, "logits/rejected": -3.3765645027160645, "logps/chosen": -180.77725219726562, "logps/rejected": -236.9386749267578, "loss": 0.3513, "rewards/accuracies": 0.875, "rewards/chosen": -0.21217359602451324, "rewards/margins": 1.8032169342041016, "rewards/rejected": -2.015390396118164, "step": 3841 }, { "epoch": 0.44, "learning_rate": 1.6968278122439425e-07, "logits/chosen": -3.5496749877929688, "logits/rejected": -3.6957125663757324, "logps/chosen": -237.6498260498047, "logps/rejected": -195.25393676757812, "loss": 0.411, "rewards/accuracies": 0.875, "rewards/chosen": 0.012890934944152832, "rewards/margins": 1.1408519744873047, "rewards/rejected": -1.1279609203338623, "step": 3842 }, { "epoch": 0.44, "learning_rate": 1.6964766475476997e-07, "logits/chosen": -3.059424877166748, "logits/rejected": -3.005371570587158, "logps/chosen": -257.50933837890625, "logps/rejected": -247.61671447753906, "loss": 0.2584, "rewards/accuracies": 0.875, "rewards/chosen": -0.2443234771490097, "rewards/margins": 2.3234567642211914, "rewards/rejected": -2.5677804946899414, "step": 3843 }, { "epoch": 0.44, "learning_rate": 1.6961254828514573e-07, "logits/chosen": -2.8302178382873535, "logits/rejected": -2.791989326477051, "logps/chosen": -394.8868408203125, "logps/rejected": -264.3414306640625, "loss": 0.4039, "rewards/accuracies": 0.875, "rewards/chosen": 0.48508021235466003, "rewards/margins": 2.160410165786743, "rewards/rejected": -1.6753300428390503, "step": 3844 }, { "epoch": 0.44, "learning_rate": 1.6957743181552148e-07, "logits/chosen": -2.563244342803955, "logits/rejected": -2.798534393310547, "logps/chosen": -514.3057861328125, "logps/rejected": -412.5401611328125, "loss": 0.3098, "rewards/accuracies": 0.875, "rewards/chosen": 0.3019489645957947, "rewards/margins": 1.951155662536621, "rewards/rejected": -1.6492067575454712, "step": 3845 }, { "epoch": 0.44, "learning_rate": 1.695423153458972e-07, "logits/chosen": -3.4920597076416016, "logits/rejected": -3.453874349594116, "logps/chosen": -240.20269775390625, "logps/rejected": -175.48529052734375, "loss": 0.439, "rewards/accuracies": 0.875, "rewards/chosen": 0.3508619964122772, "rewards/margins": 1.9807889461517334, "rewards/rejected": -1.6299270391464233, "step": 3846 }, { "epoch": 0.44, "learning_rate": 1.6950719887627296e-07, "logits/chosen": -3.157660961151123, "logits/rejected": -3.0012764930725098, "logps/chosen": -172.97145080566406, "logps/rejected": -186.09869384765625, "loss": 0.4586, "rewards/accuracies": 0.625, "rewards/chosen": 0.1452174186706543, "rewards/margins": 1.2179901599884033, "rewards/rejected": -1.0727728605270386, "step": 3847 }, { "epoch": 0.44, "learning_rate": 1.694720824066487e-07, "logits/chosen": -2.7757856845855713, "logits/rejected": -2.6125760078430176, "logps/chosen": -270.7652587890625, "logps/rejected": -194.69570922851562, "loss": 0.4561, "rewards/accuracies": 0.75, "rewards/chosen": -0.46264350414276123, "rewards/margins": 0.8285222053527832, "rewards/rejected": -1.2911657094955444, "step": 3848 }, { "epoch": 0.44, "learning_rate": 1.6943696593702444e-07, "logits/chosen": -2.099597692489624, "logits/rejected": -2.2245240211486816, "logps/chosen": -294.6419982910156, "logps/rejected": -258.6245422363281, "loss": 0.2524, "rewards/accuracies": 1.0, "rewards/chosen": 0.17028126120567322, "rewards/margins": 1.5689235925674438, "rewards/rejected": -1.3986423015594482, "step": 3849 }, { "epoch": 0.44, "learning_rate": 1.6940184946740022e-07, "logits/chosen": -3.6705031394958496, "logits/rejected": -3.3457181453704834, "logps/chosen": -257.61151123046875, "logps/rejected": -279.2810974121094, "loss": 0.1477, "rewards/accuracies": 0.875, "rewards/chosen": -0.026253875344991684, "rewards/margins": 3.354686737060547, "rewards/rejected": -3.3809406757354736, "step": 3850 }, { "epoch": 0.44, "learning_rate": 1.6936673299777595e-07, "logits/chosen": -3.6294422149658203, "logits/rejected": -3.568026542663574, "logps/chosen": -229.396728515625, "logps/rejected": -300.3705139160156, "loss": 0.347, "rewards/accuracies": 0.875, "rewards/chosen": -0.2553942799568176, "rewards/margins": 2.346906900405884, "rewards/rejected": -2.6023013591766357, "step": 3851 }, { "epoch": 0.44, "learning_rate": 1.693316165281517e-07, "logits/chosen": -2.568222761154175, "logits/rejected": -2.519191265106201, "logps/chosen": -364.44183349609375, "logps/rejected": -284.93359375, "loss": 0.6937, "rewards/accuracies": 0.375, "rewards/chosen": -0.7436318397521973, "rewards/margins": 0.12356360256671906, "rewards/rejected": -0.8671954274177551, "step": 3852 }, { "epoch": 0.44, "learning_rate": 1.6929650005852746e-07, "logits/chosen": -2.6728463172912598, "logits/rejected": -3.287393093109131, "logps/chosen": -207.31398010253906, "logps/rejected": -299.4951171875, "loss": 0.2872, "rewards/accuracies": 1.0, "rewards/chosen": 0.11166258156299591, "rewards/margins": 1.8056936264038086, "rewards/rejected": -1.6940311193466187, "step": 3853 }, { "epoch": 0.44, "learning_rate": 1.6926138358890319e-07, "logits/chosen": -3.4130241870880127, "logits/rejected": -3.267488956451416, "logps/chosen": -164.45053100585938, "logps/rejected": -152.47476196289062, "loss": 0.4926, "rewards/accuracies": 0.875, "rewards/chosen": -0.29558950662612915, "rewards/margins": 1.0614149570465088, "rewards/rejected": -1.3570042848587036, "step": 3854 }, { "epoch": 0.44, "learning_rate": 1.6922626711927894e-07, "logits/chosen": -2.845470428466797, "logits/rejected": -2.8128538131713867, "logps/chosen": -438.796630859375, "logps/rejected": -431.63970947265625, "loss": 0.2672, "rewards/accuracies": 0.875, "rewards/chosen": 0.06244960054755211, "rewards/margins": 2.0924651622772217, "rewards/rejected": -2.030015468597412, "step": 3855 }, { "epoch": 0.44, "learning_rate": 1.6919115064965467e-07, "logits/chosen": -3.7588729858398438, "logits/rejected": -3.5088040828704834, "logps/chosen": -326.962158203125, "logps/rejected": -241.14700317382812, "loss": 0.2859, "rewards/accuracies": 0.75, "rewards/chosen": 0.3627627491950989, "rewards/margins": 1.9224705696105957, "rewards/rejected": -1.5597076416015625, "step": 3856 }, { "epoch": 0.44, "learning_rate": 1.6915603418003042e-07, "logits/chosen": -4.22706413269043, "logits/rejected": -3.7585716247558594, "logps/chosen": -417.4139099121094, "logps/rejected": -280.3055419921875, "loss": 0.2542, "rewards/accuracies": 1.0, "rewards/chosen": 0.13051772117614746, "rewards/margins": 1.832986831665039, "rewards/rejected": -1.7024691104888916, "step": 3857 }, { "epoch": 0.44, "learning_rate": 1.6912091771040617e-07, "logits/chosen": -3.067542552947998, "logits/rejected": -3.0553903579711914, "logps/chosen": -269.66033935546875, "logps/rejected": -316.33294677734375, "loss": 0.6232, "rewards/accuracies": 0.5, "rewards/chosen": -0.5500885248184204, "rewards/margins": 1.5749268531799316, "rewards/rejected": -2.1250152587890625, "step": 3858 }, { "epoch": 0.44, "learning_rate": 1.690858012407819e-07, "logits/chosen": -2.6001038551330566, "logits/rejected": -2.756808280944824, "logps/chosen": -672.947998046875, "logps/rejected": -285.09002685546875, "loss": 0.3472, "rewards/accuracies": 0.875, "rewards/chosen": 0.5256681442260742, "rewards/margins": 1.3180114030838013, "rewards/rejected": -0.7923431992530823, "step": 3859 }, { "epoch": 0.44, "learning_rate": 1.6905068477115768e-07, "logits/chosen": -3.079257011413574, "logits/rejected": -3.0342655181884766, "logps/chosen": -92.11864471435547, "logps/rejected": -99.0540771484375, "loss": 0.5902, "rewards/accuracies": 0.625, "rewards/chosen": -0.11408732086420059, "rewards/margins": 0.44643402099609375, "rewards/rejected": -0.5605213642120361, "step": 3860 }, { "epoch": 0.45, "learning_rate": 1.6901556830153338e-07, "logits/chosen": -2.578151226043701, "logits/rejected": -2.549561023712158, "logps/chosen": -250.04710388183594, "logps/rejected": -312.9638366699219, "loss": 0.1119, "rewards/accuracies": 1.0, "rewards/chosen": 0.6282626390457153, "rewards/margins": 3.3821964263916016, "rewards/rejected": -2.753933906555176, "step": 3861 }, { "epoch": 0.45, "learning_rate": 1.6898045183190916e-07, "logits/chosen": -3.592589855194092, "logits/rejected": -3.6364269256591797, "logps/chosen": -145.2577667236328, "logps/rejected": -207.04232788085938, "loss": 0.2612, "rewards/accuracies": 1.0, "rewards/chosen": -0.3064040243625641, "rewards/margins": 1.6719707250595093, "rewards/rejected": -1.978374719619751, "step": 3862 }, { "epoch": 0.45, "learning_rate": 1.6894533536228492e-07, "logits/chosen": -2.8985047340393066, "logits/rejected": -2.9390482902526855, "logps/chosen": -288.90386962890625, "logps/rejected": -330.4521484375, "loss": 0.7616, "rewards/accuracies": 0.625, "rewards/chosen": -0.4438968598842621, "rewards/margins": 0.5341230630874634, "rewards/rejected": -0.9780200123786926, "step": 3863 }, { "epoch": 0.45, "learning_rate": 1.6891021889266065e-07, "logits/chosen": -2.296065330505371, "logits/rejected": -2.7112064361572266, "logps/chosen": -158.19680786132812, "logps/rejected": -139.00413513183594, "loss": 0.5802, "rewards/accuracies": 0.625, "rewards/chosen": -0.5655057430267334, "rewards/margins": 0.5407572984695435, "rewards/rejected": -1.1062629222869873, "step": 3864 }, { "epoch": 0.45, "learning_rate": 1.688751024230364e-07, "logits/chosen": -2.9475653171539307, "logits/rejected": -3.011859893798828, "logps/chosen": -296.60809326171875, "logps/rejected": -405.88623046875, "loss": 0.1359, "rewards/accuracies": 0.875, "rewards/chosen": -0.2793511152267456, "rewards/margins": 3.616455078125, "rewards/rejected": -3.895806312561035, "step": 3865 }, { "epoch": 0.45, "learning_rate": 1.6883998595341215e-07, "logits/chosen": -3.0542092323303223, "logits/rejected": -3.1475327014923096, "logps/chosen": -122.2643051147461, "logps/rejected": -130.79351806640625, "loss": 0.4921, "rewards/accuracies": 0.5, "rewards/chosen": 0.11073818057775497, "rewards/margins": 0.8513396978378296, "rewards/rejected": -0.7406014800071716, "step": 3866 }, { "epoch": 0.45, "learning_rate": 1.6880486948378788e-07, "logits/chosen": -3.76084303855896, "logits/rejected": -3.6724438667297363, "logps/chosen": -266.74346923828125, "logps/rejected": -307.8699645996094, "loss": 0.3991, "rewards/accuracies": 0.875, "rewards/chosen": -0.27069538831710815, "rewards/margins": 1.9395934343338013, "rewards/rejected": -2.2102887630462646, "step": 3867 }, { "epoch": 0.45, "learning_rate": 1.6876975301416363e-07, "logits/chosen": -3.8395590782165527, "logits/rejected": -3.994643449783325, "logps/chosen": -181.98068237304688, "logps/rejected": -325.96002197265625, "loss": 0.8344, "rewards/accuracies": 0.875, "rewards/chosen": 0.1679760068655014, "rewards/margins": 0.7577130794525146, "rewards/rejected": -0.589737057685852, "step": 3868 }, { "epoch": 0.45, "learning_rate": 1.6873463654453936e-07, "logits/chosen": -2.4880287647247314, "logits/rejected": -2.7334797382354736, "logps/chosen": -230.49827575683594, "logps/rejected": -330.4295654296875, "loss": 0.4923, "rewards/accuracies": 0.75, "rewards/chosen": -0.4239642024040222, "rewards/margins": 1.0541129112243652, "rewards/rejected": -1.4780771732330322, "step": 3869 }, { "epoch": 0.45, "learning_rate": 1.6869952007491512e-07, "logits/chosen": -3.1235508918762207, "logits/rejected": -3.05199933052063, "logps/chosen": -284.97320556640625, "logps/rejected": -336.5337219238281, "loss": 0.4886, "rewards/accuracies": 0.875, "rewards/chosen": -0.15582121908664703, "rewards/margins": 1.5183848142623901, "rewards/rejected": -1.674206018447876, "step": 3870 }, { "epoch": 0.45, "learning_rate": 1.686644036052909e-07, "logits/chosen": -2.8333353996276855, "logits/rejected": -2.749884605407715, "logps/chosen": -173.58392333984375, "logps/rejected": -189.92547607421875, "loss": 0.4994, "rewards/accuracies": 0.625, "rewards/chosen": -0.776189386844635, "rewards/margins": 1.169601321220398, "rewards/rejected": -1.9457906484603882, "step": 3871 }, { "epoch": 0.45, "learning_rate": 1.686292871356666e-07, "logits/chosen": -2.709059238433838, "logits/rejected": -2.7070860862731934, "logps/chosen": -246.8770751953125, "logps/rejected": -321.349853515625, "loss": 0.1913, "rewards/accuracies": 1.0, "rewards/chosen": 0.616919994354248, "rewards/margins": 2.304532766342163, "rewards/rejected": -1.687612771987915, "step": 3872 }, { "epoch": 0.45, "learning_rate": 1.6859417066604238e-07, "logits/chosen": -3.0224175453186035, "logits/rejected": -2.831676483154297, "logps/chosen": -204.62814331054688, "logps/rejected": -156.0582275390625, "loss": 0.399, "rewards/accuracies": 0.875, "rewards/chosen": 0.12646663188934326, "rewards/margins": 1.147215485572815, "rewards/rejected": -1.0207488536834717, "step": 3873 }, { "epoch": 0.45, "learning_rate": 1.6855905419641813e-07, "logits/chosen": -3.748628854751587, "logits/rejected": -4.007974147796631, "logps/chosen": -160.3075408935547, "logps/rejected": -169.16030883789062, "loss": 0.295, "rewards/accuracies": 0.875, "rewards/chosen": 0.2671356797218323, "rewards/margins": 2.5194919109344482, "rewards/rejected": -2.2523562908172607, "step": 3874 }, { "epoch": 0.45, "learning_rate": 1.6852393772679386e-07, "logits/chosen": -3.393664598464966, "logits/rejected": -3.315054178237915, "logps/chosen": -265.0625915527344, "logps/rejected": -269.63140869140625, "loss": 0.3293, "rewards/accuracies": 0.875, "rewards/chosen": 0.18085436522960663, "rewards/margins": 3.3327484130859375, "rewards/rejected": -3.1518940925598145, "step": 3875 }, { "epoch": 0.45, "learning_rate": 1.684888212571696e-07, "logits/chosen": -2.6979892253875732, "logits/rejected": -2.4428157806396484, "logps/chosen": -415.4053955078125, "logps/rejected": -311.0868225097656, "loss": 0.2981, "rewards/accuracies": 0.875, "rewards/chosen": 0.17243170738220215, "rewards/margins": 2.718362808227539, "rewards/rejected": -2.545931100845337, "step": 3876 }, { "epoch": 0.45, "learning_rate": 1.6845370478754534e-07, "logits/chosen": -3.24533748626709, "logits/rejected": -3.1194043159484863, "logps/chosen": -291.28363037109375, "logps/rejected": -500.8436279296875, "loss": 0.6475, "rewards/accuracies": 0.5, "rewards/chosen": -0.15035519003868103, "rewards/margins": 0.9167739152908325, "rewards/rejected": -1.067129135131836, "step": 3877 }, { "epoch": 0.45, "learning_rate": 1.684185883179211e-07, "logits/chosen": -3.4611239433288574, "logits/rejected": -3.0433244705200195, "logps/chosen": -315.01666259765625, "logps/rejected": -246.11155700683594, "loss": 0.6005, "rewards/accuracies": 0.75, "rewards/chosen": -0.2760579586029053, "rewards/margins": 0.6002941131591797, "rewards/rejected": -0.8763521909713745, "step": 3878 }, { "epoch": 0.45, "learning_rate": 1.6838347184829685e-07, "logits/chosen": -2.6113336086273193, "logits/rejected": -2.670722484588623, "logps/chosen": -262.0046691894531, "logps/rejected": -223.1996307373047, "loss": 0.562, "rewards/accuracies": 0.625, "rewards/chosen": -0.37164202332496643, "rewards/margins": 0.8300586342811584, "rewards/rejected": -1.2017008066177368, "step": 3879 }, { "epoch": 0.45, "learning_rate": 1.6834835537867257e-07, "logits/chosen": -3.342900276184082, "logits/rejected": -3.673882484436035, "logps/chosen": -169.36630249023438, "logps/rejected": -151.51882934570312, "loss": 0.4471, "rewards/accuracies": 0.75, "rewards/chosen": -0.19415529072284698, "rewards/margins": 0.9486415386199951, "rewards/rejected": -1.1427967548370361, "step": 3880 }, { "epoch": 0.45, "learning_rate": 1.6831323890904833e-07, "logits/chosen": -2.229797840118408, "logits/rejected": -2.537360668182373, "logps/chosen": -399.0658264160156, "logps/rejected": -352.37896728515625, "loss": 0.1668, "rewards/accuracies": 1.0, "rewards/chosen": 0.3610866069793701, "rewards/margins": 2.1801562309265137, "rewards/rejected": -1.819069743156433, "step": 3881 }, { "epoch": 0.45, "learning_rate": 1.682781224394241e-07, "logits/chosen": -3.6444664001464844, "logits/rejected": -3.378690242767334, "logps/chosen": -347.6923828125, "logps/rejected": -311.1700439453125, "loss": 0.4543, "rewards/accuracies": 0.75, "rewards/chosen": -0.23901455104351044, "rewards/margins": 1.8777564764022827, "rewards/rejected": -2.1167712211608887, "step": 3882 }, { "epoch": 0.45, "learning_rate": 1.682430059697998e-07, "logits/chosen": -2.93082857131958, "logits/rejected": -2.772521495819092, "logps/chosen": -240.93133544921875, "logps/rejected": -211.7318878173828, "loss": 0.4091, "rewards/accuracies": 0.875, "rewards/chosen": -0.5353559851646423, "rewards/margins": 1.1363091468811035, "rewards/rejected": -1.6716651916503906, "step": 3883 }, { "epoch": 0.45, "learning_rate": 1.682078895001756e-07, "logits/chosen": -2.586240291595459, "logits/rejected": -2.949476957321167, "logps/chosen": -193.0887908935547, "logps/rejected": -221.45816040039062, "loss": 0.5653, "rewards/accuracies": 0.75, "rewards/chosen": -0.1109359860420227, "rewards/margins": 2.0748164653778076, "rewards/rejected": -2.1857523918151855, "step": 3884 }, { "epoch": 0.45, "learning_rate": 1.6817277303055132e-07, "logits/chosen": -3.2292394638061523, "logits/rejected": -3.2614524364471436, "logps/chosen": -588.3258056640625, "logps/rejected": -325.7349853515625, "loss": 0.3944, "rewards/accuracies": 0.75, "rewards/chosen": -0.2567040026187897, "rewards/margins": 1.66847825050354, "rewards/rejected": -1.9251822233200073, "step": 3885 }, { "epoch": 0.45, "learning_rate": 1.6813765656092707e-07, "logits/chosen": -3.5593667030334473, "logits/rejected": -3.1227147579193115, "logps/chosen": -325.40478515625, "logps/rejected": -273.6921691894531, "loss": 0.205, "rewards/accuracies": 1.0, "rewards/chosen": 0.10359519720077515, "rewards/margins": 2.1111083030700684, "rewards/rejected": -2.0075132846832275, "step": 3886 }, { "epoch": 0.45, "learning_rate": 1.6810254009130283e-07, "logits/chosen": -2.8436498641967773, "logits/rejected": -3.0519158840179443, "logps/chosen": -146.1928253173828, "logps/rejected": -228.0513916015625, "loss": 0.491, "rewards/accuracies": 0.875, "rewards/chosen": -0.721136212348938, "rewards/margins": 1.5719950199127197, "rewards/rejected": -2.2931313514709473, "step": 3887 }, { "epoch": 0.45, "learning_rate": 1.6806742362167855e-07, "logits/chosen": -3.2631518840789795, "logits/rejected": -3.4329028129577637, "logps/chosen": -312.8166198730469, "logps/rejected": -350.3734130859375, "loss": 0.2319, "rewards/accuracies": 0.875, "rewards/chosen": 0.586063802242279, "rewards/margins": 2.6200506687164307, "rewards/rejected": -2.033986806869507, "step": 3888 }, { "epoch": 0.45, "learning_rate": 1.680323071520543e-07, "logits/chosen": -3.1916112899780273, "logits/rejected": -3.44156551361084, "logps/chosen": -384.52398681640625, "logps/rejected": -255.17474365234375, "loss": 0.6373, "rewards/accuracies": 0.75, "rewards/chosen": 0.17399150133132935, "rewards/margins": 1.3684548139572144, "rewards/rejected": -1.1944632530212402, "step": 3889 }, { "epoch": 0.45, "learning_rate": 1.6799719068243006e-07, "logits/chosen": -3.2748184204101562, "logits/rejected": -3.3138275146484375, "logps/chosen": -188.09335327148438, "logps/rejected": -154.2055206298828, "loss": 0.3981, "rewards/accuracies": 0.875, "rewards/chosen": 0.017880067229270935, "rewards/margins": 1.4722833633422852, "rewards/rejected": -1.4544031620025635, "step": 3890 }, { "epoch": 0.45, "learning_rate": 1.679620742128058e-07, "logits/chosen": -3.231192111968994, "logits/rejected": -3.3155622482299805, "logps/chosen": -136.6150360107422, "logps/rejected": -130.3331298828125, "loss": 0.5179, "rewards/accuracies": 0.625, "rewards/chosen": -0.5456979870796204, "rewards/margins": 0.5372752547264099, "rewards/rejected": -1.0829732418060303, "step": 3891 }, { "epoch": 0.45, "learning_rate": 1.6792695774318154e-07, "logits/chosen": -3.229435920715332, "logits/rejected": -3.3020009994506836, "logps/chosen": -58.47271728515625, "logps/rejected": -134.1791229248047, "loss": 0.5248, "rewards/accuracies": 0.5, "rewards/chosen": -0.023945041000843048, "rewards/margins": 0.8842027187347412, "rewards/rejected": -0.9081476926803589, "step": 3892 }, { "epoch": 0.45, "learning_rate": 1.6789184127355727e-07, "logits/chosen": -3.7879343032836914, "logits/rejected": -3.8460826873779297, "logps/chosen": -201.2899932861328, "logps/rejected": -136.4763946533203, "loss": 0.416, "rewards/accuracies": 0.75, "rewards/chosen": -0.08611539006233215, "rewards/margins": 1.3985284566879272, "rewards/rejected": -1.4846436977386475, "step": 3893 }, { "epoch": 0.45, "learning_rate": 1.6785672480393305e-07, "logits/chosen": -3.2066304683685303, "logits/rejected": -2.967660903930664, "logps/chosen": -335.3586120605469, "logps/rejected": -251.68028259277344, "loss": 0.2328, "rewards/accuracies": 1.0, "rewards/chosen": 0.28362923860549927, "rewards/margins": 1.8555078506469727, "rewards/rejected": -1.5718786716461182, "step": 3894 }, { "epoch": 0.45, "learning_rate": 1.678216083343088e-07, "logits/chosen": -3.0254433155059814, "logits/rejected": -2.952657699584961, "logps/chosen": -310.55615234375, "logps/rejected": -209.0472412109375, "loss": 0.321, "rewards/accuracies": 1.0, "rewards/chosen": 0.11981107294559479, "rewards/margins": 1.2418019771575928, "rewards/rejected": -1.1219907999038696, "step": 3895 }, { "epoch": 0.45, "learning_rate": 1.6778649186468453e-07, "logits/chosen": -3.44158673286438, "logits/rejected": -3.160054922103882, "logps/chosen": -183.380615234375, "logps/rejected": -212.17001342773438, "loss": 0.1761, "rewards/accuracies": 0.875, "rewards/chosen": -0.22255712747573853, "rewards/margins": 2.851637363433838, "rewards/rejected": -3.0741944313049316, "step": 3896 }, { "epoch": 0.45, "learning_rate": 1.6775137539506028e-07, "logits/chosen": -2.9350032806396484, "logits/rejected": -3.064073085784912, "logps/chosen": -348.854736328125, "logps/rejected": -291.3142395019531, "loss": 0.7842, "rewards/accuracies": 0.625, "rewards/chosen": -0.7101092338562012, "rewards/margins": 0.6592597961425781, "rewards/rejected": -1.3693690299987793, "step": 3897 }, { "epoch": 0.45, "learning_rate": 1.6771625892543604e-07, "logits/chosen": -3.0275933742523193, "logits/rejected": -2.914254665374756, "logps/chosen": -424.3076171875, "logps/rejected": -496.337158203125, "loss": 1.2405, "rewards/accuracies": 0.5, "rewards/chosen": 0.0633334070444107, "rewards/margins": -0.43494293093681335, "rewards/rejected": 0.49827635288238525, "step": 3898 }, { "epoch": 0.45, "learning_rate": 1.6768114245581177e-07, "logits/chosen": -2.8038079738616943, "logits/rejected": -2.9028995037078857, "logps/chosen": -177.37120056152344, "logps/rejected": -194.94635009765625, "loss": 0.3249, "rewards/accuracies": 0.875, "rewards/chosen": -0.14306241273880005, "rewards/margins": 2.216038227081299, "rewards/rejected": -2.359100341796875, "step": 3899 }, { "epoch": 0.45, "learning_rate": 1.6764602598618752e-07, "logits/chosen": -3.2047643661499023, "logits/rejected": -3.1078362464904785, "logps/chosen": -280.8101806640625, "logps/rejected": -271.61334228515625, "loss": 0.4999, "rewards/accuracies": 0.625, "rewards/chosen": 0.056729406118392944, "rewards/margins": 1.1859720945358276, "rewards/rejected": -1.1292426586151123, "step": 3900 }, { "epoch": 0.45, "learning_rate": 1.6761090951656325e-07, "logits/chosen": -3.1043453216552734, "logits/rejected": -3.3430185317993164, "logps/chosen": -103.40493774414062, "logps/rejected": -162.62950134277344, "loss": 0.2771, "rewards/accuracies": 0.875, "rewards/chosen": 0.16949604451656342, "rewards/margins": 2.740267515182495, "rewards/rejected": -2.5707716941833496, "step": 3901 }, { "epoch": 0.45, "learning_rate": 1.67575793046939e-07, "logits/chosen": -2.845283031463623, "logits/rejected": -2.888688325881958, "logps/chosen": -381.3897705078125, "logps/rejected": -272.5105895996094, "loss": 0.3588, "rewards/accuracies": 0.875, "rewards/chosen": -0.34148579835891724, "rewards/margins": 1.7198735475540161, "rewards/rejected": -2.061359167098999, "step": 3902 }, { "epoch": 0.45, "learning_rate": 1.6754067657731475e-07, "logits/chosen": -3.615793228149414, "logits/rejected": -3.4860551357269287, "logps/chosen": -252.29299926757812, "logps/rejected": -320.9788818359375, "loss": 0.3775, "rewards/accuracies": 0.75, "rewards/chosen": -0.19172416627407074, "rewards/margins": 1.9543359279632568, "rewards/rejected": -2.146059989929199, "step": 3903 }, { "epoch": 0.45, "learning_rate": 1.6750556010769048e-07, "logits/chosen": -2.899372100830078, "logits/rejected": -2.77384614944458, "logps/chosen": -220.14102172851562, "logps/rejected": -256.169677734375, "loss": 0.8327, "rewards/accuracies": 0.875, "rewards/chosen": 0.07914219796657562, "rewards/margins": 0.9766309857368469, "rewards/rejected": -0.8974887132644653, "step": 3904 }, { "epoch": 0.45, "learning_rate": 1.6747044363806626e-07, "logits/chosen": -4.083298206329346, "logits/rejected": -3.6591484546661377, "logps/chosen": -311.29107666015625, "logps/rejected": -143.20938110351562, "loss": 0.7459, "rewards/accuracies": 0.625, "rewards/chosen": -0.8008547425270081, "rewards/margins": 0.6739466190338135, "rewards/rejected": -1.4748014211654663, "step": 3905 }, { "epoch": 0.45, "learning_rate": 1.6743532716844196e-07, "logits/chosen": -3.1859443187713623, "logits/rejected": -3.48614239692688, "logps/chosen": -294.55657958984375, "logps/rejected": -483.15631103515625, "loss": 0.7159, "rewards/accuracies": 0.625, "rewards/chosen": -0.4383498430252075, "rewards/margins": 0.2848326563835144, "rewards/rejected": -0.7231824398040771, "step": 3906 }, { "epoch": 0.45, "learning_rate": 1.6740021069881774e-07, "logits/chosen": -3.3163130283355713, "logits/rejected": -3.3488364219665527, "logps/chosen": -263.12335205078125, "logps/rejected": -300.5025634765625, "loss": 0.4234, "rewards/accuracies": 0.875, "rewards/chosen": -0.7805500030517578, "rewards/margins": 1.705501675605774, "rewards/rejected": -2.486051559448242, "step": 3907 }, { "epoch": 0.45, "learning_rate": 1.673650942291935e-07, "logits/chosen": -2.5484869480133057, "logits/rejected": -2.5810928344726562, "logps/chosen": -219.32505798339844, "logps/rejected": -256.1408996582031, "loss": 0.3058, "rewards/accuracies": 0.875, "rewards/chosen": -0.08273406326770782, "rewards/margins": 2.1331024169921875, "rewards/rejected": -2.215836524963379, "step": 3908 }, { "epoch": 0.45, "learning_rate": 1.6732997775956922e-07, "logits/chosen": -3.905609607696533, "logits/rejected": -3.7289156913757324, "logps/chosen": -245.06663513183594, "logps/rejected": -197.56292724609375, "loss": 0.4261, "rewards/accuracies": 0.75, "rewards/chosen": 0.04866520315408707, "rewards/margins": 1.2163619995117188, "rewards/rejected": -1.1676968336105347, "step": 3909 }, { "epoch": 0.45, "learning_rate": 1.6729486128994498e-07, "logits/chosen": -4.019491672515869, "logits/rejected": -3.8270466327667236, "logps/chosen": -243.4026641845703, "logps/rejected": -196.89645385742188, "loss": 0.4203, "rewards/accuracies": 0.875, "rewards/chosen": -0.042392242699861526, "rewards/margins": 1.3091219663619995, "rewards/rejected": -1.3515143394470215, "step": 3910 }, { "epoch": 0.45, "learning_rate": 1.6725974482032073e-07, "logits/chosen": -2.494875431060791, "logits/rejected": -2.664867639541626, "logps/chosen": -690.2308349609375, "logps/rejected": -518.0101318359375, "loss": 0.6143, "rewards/accuracies": 0.625, "rewards/chosen": 0.18021927773952484, "rewards/margins": 1.3277764320373535, "rewards/rejected": -1.147557258605957, "step": 3911 }, { "epoch": 0.45, "learning_rate": 1.6722462835069646e-07, "logits/chosen": -2.5157837867736816, "logits/rejected": -2.8236405849456787, "logps/chosen": -202.26364135742188, "logps/rejected": -242.93899536132812, "loss": 0.405, "rewards/accuracies": 0.75, "rewards/chosen": -0.08699209243059158, "rewards/margins": 1.2268575429916382, "rewards/rejected": -1.3138495683670044, "step": 3912 }, { "epoch": 0.45, "learning_rate": 1.6718951188107221e-07, "logits/chosen": -3.0516393184661865, "logits/rejected": -3.5533287525177, "logps/chosen": -200.78036499023438, "logps/rejected": -245.96046447753906, "loss": 0.2833, "rewards/accuracies": 1.0, "rewards/chosen": -0.24726897478103638, "rewards/margins": 2.300065755844116, "rewards/rejected": -2.547334909439087, "step": 3913 }, { "epoch": 0.45, "learning_rate": 1.6715439541144794e-07, "logits/chosen": -2.1686410903930664, "logits/rejected": -2.3424363136291504, "logps/chosen": -281.7262878417969, "logps/rejected": -240.1151123046875, "loss": 0.4651, "rewards/accuracies": 0.75, "rewards/chosen": -0.06388819217681885, "rewards/margins": 1.3189105987548828, "rewards/rejected": -1.382798671722412, "step": 3914 }, { "epoch": 0.45, "learning_rate": 1.671192789418237e-07, "logits/chosen": -3.195685863494873, "logits/rejected": -2.894683837890625, "logps/chosen": -277.0578918457031, "logps/rejected": -192.98788452148438, "loss": 0.5809, "rewards/accuracies": 0.75, "rewards/chosen": -0.5015321969985962, "rewards/margins": 0.7417024374008179, "rewards/rejected": -1.243234634399414, "step": 3915 }, { "epoch": 0.45, "learning_rate": 1.6708416247219948e-07, "logits/chosen": -2.842817783355713, "logits/rejected": -2.7137913703918457, "logps/chosen": -323.8084716796875, "logps/rejected": -425.027587890625, "loss": 0.2629, "rewards/accuracies": 0.875, "rewards/chosen": 0.0037068650126457214, "rewards/margins": 2.248180389404297, "rewards/rejected": -2.244473695755005, "step": 3916 }, { "epoch": 0.45, "learning_rate": 1.6704904600257518e-07, "logits/chosen": -2.7317750453948975, "logits/rejected": -2.9263811111450195, "logps/chosen": -387.1406555175781, "logps/rejected": -326.827880859375, "loss": 0.2967, "rewards/accuracies": 1.0, "rewards/chosen": 0.5465068817138672, "rewards/margins": 1.5278393030166626, "rewards/rejected": -0.9813324809074402, "step": 3917 }, { "epoch": 0.45, "learning_rate": 1.6701392953295096e-07, "logits/chosen": -3.2112770080566406, "logits/rejected": -3.1896276473999023, "logps/chosen": -203.7079620361328, "logps/rejected": -225.31387329101562, "loss": 0.4817, "rewards/accuracies": 0.75, "rewards/chosen": -0.6842728853225708, "rewards/margins": 0.9469429850578308, "rewards/rejected": -1.6312158107757568, "step": 3918 }, { "epoch": 0.45, "learning_rate": 1.669788130633267e-07, "logits/chosen": -3.175036907196045, "logits/rejected": -3.582202196121216, "logps/chosen": -174.6392822265625, "logps/rejected": -273.2793273925781, "loss": 0.2244, "rewards/accuracies": 1.0, "rewards/chosen": 0.007361330091953278, "rewards/margins": 3.4077954292297363, "rewards/rejected": -3.4004340171813965, "step": 3919 }, { "epoch": 0.45, "learning_rate": 1.6694369659370244e-07, "logits/chosen": -2.888916015625, "logits/rejected": -2.753185272216797, "logps/chosen": -143.0266571044922, "logps/rejected": -130.4777069091797, "loss": 0.4428, "rewards/accuracies": 0.625, "rewards/chosen": -0.18790292739868164, "rewards/margins": 0.9244746565818787, "rewards/rejected": -1.112377643585205, "step": 3920 }, { "epoch": 0.45, "learning_rate": 1.669085801240782e-07, "logits/chosen": -3.3482506275177, "logits/rejected": -3.5022435188293457, "logps/chosen": -181.4210205078125, "logps/rejected": -242.97943115234375, "loss": 0.2461, "rewards/accuracies": 0.875, "rewards/chosen": 0.1498022973537445, "rewards/margins": 1.8597441911697388, "rewards/rejected": -1.7099418640136719, "step": 3921 }, { "epoch": 0.45, "learning_rate": 1.6687346365445392e-07, "logits/chosen": -2.97554874420166, "logits/rejected": -3.2131731510162354, "logps/chosen": -319.1679992675781, "logps/rejected": -334.4341125488281, "loss": 0.6327, "rewards/accuracies": 0.625, "rewards/chosen": -0.3946351110935211, "rewards/margins": 1.6252377033233643, "rewards/rejected": -2.0198729038238525, "step": 3922 }, { "epoch": 0.45, "learning_rate": 1.6683834718482967e-07, "logits/chosen": -2.7938454151153564, "logits/rejected": -2.641062021255493, "logps/chosen": -157.29443359375, "logps/rejected": -192.3769989013672, "loss": 0.5457, "rewards/accuracies": 0.625, "rewards/chosen": -0.17077118158340454, "rewards/margins": 1.4029216766357422, "rewards/rejected": -1.5736926794052124, "step": 3923 }, { "epoch": 0.45, "learning_rate": 1.6680323071520543e-07, "logits/chosen": -2.5717058181762695, "logits/rejected": -2.475672483444214, "logps/chosen": -217.38052368164062, "logps/rejected": -259.3857116699219, "loss": 0.149, "rewards/accuracies": 1.0, "rewards/chosen": 0.34016233682632446, "rewards/margins": 2.8364720344543457, "rewards/rejected": -2.496309995651245, "step": 3924 }, { "epoch": 0.45, "learning_rate": 1.6676811424558115e-07, "logits/chosen": -3.6335225105285645, "logits/rejected": -4.148991584777832, "logps/chosen": -205.16958618164062, "logps/rejected": -219.7207794189453, "loss": 0.5761, "rewards/accuracies": 0.5, "rewards/chosen": -0.6447200179100037, "rewards/margins": 0.5442485809326172, "rewards/rejected": -1.1889686584472656, "step": 3925 }, { "epoch": 0.45, "learning_rate": 1.667329977759569e-07, "logits/chosen": -2.8978145122528076, "logits/rejected": -3.1460652351379395, "logps/chosen": -190.1858367919922, "logps/rejected": -209.722900390625, "loss": 0.436, "rewards/accuracies": 0.625, "rewards/chosen": -0.42125946283340454, "rewards/margins": 2.1215572357177734, "rewards/rejected": -2.5428171157836914, "step": 3926 }, { "epoch": 0.45, "learning_rate": 1.666978813063327e-07, "logits/chosen": -2.9563565254211426, "logits/rejected": -3.01013445854187, "logps/chosen": -294.2347717285156, "logps/rejected": -262.5138854980469, "loss": 0.3021, "rewards/accuracies": 1.0, "rewards/chosen": -0.2665388286113739, "rewards/margins": 1.7277708053588867, "rewards/rejected": -1.994309663772583, "step": 3927 }, { "epoch": 0.45, "learning_rate": 1.6666276483670842e-07, "logits/chosen": -2.7254207134246826, "logits/rejected": -2.772742986679077, "logps/chosen": -312.483154296875, "logps/rejected": -288.9871826171875, "loss": 0.3681, "rewards/accuracies": 0.75, "rewards/chosen": -0.15272855758666992, "rewards/margins": 2.0317296981811523, "rewards/rejected": -2.1844582557678223, "step": 3928 }, { "epoch": 0.45, "learning_rate": 1.6662764836708417e-07, "logits/chosen": -3.1562881469726562, "logits/rejected": -2.9972753524780273, "logps/chosen": -246.06527709960938, "logps/rejected": -275.90057373046875, "loss": 0.3011, "rewards/accuracies": 0.875, "rewards/chosen": 0.2586939334869385, "rewards/margins": 1.503697156906128, "rewards/rejected": -1.2450032234191895, "step": 3929 }, { "epoch": 0.45, "learning_rate": 1.665925318974599e-07, "logits/chosen": -2.771390914916992, "logits/rejected": -2.81821346282959, "logps/chosen": -344.42205810546875, "logps/rejected": -197.8970947265625, "loss": 0.3047, "rewards/accuracies": 1.0, "rewards/chosen": 0.540795087814331, "rewards/margins": 1.5866599082946777, "rewards/rejected": -1.0458648204803467, "step": 3930 }, { "epoch": 0.45, "learning_rate": 1.6655741542783565e-07, "logits/chosen": -2.9273390769958496, "logits/rejected": -2.957430839538574, "logps/chosen": -203.44021606445312, "logps/rejected": -254.94305419921875, "loss": 0.588, "rewards/accuracies": 0.75, "rewards/chosen": -0.8859602212905884, "rewards/margins": 0.831865668296814, "rewards/rejected": -1.7178257703781128, "step": 3931 }, { "epoch": 0.45, "learning_rate": 1.665222989582114e-07, "logits/chosen": -3.131474494934082, "logits/rejected": -3.11191463470459, "logps/chosen": -226.9496612548828, "logps/rejected": -330.57122802734375, "loss": 0.4189, "rewards/accuracies": 0.875, "rewards/chosen": -0.3152734041213989, "rewards/margins": 1.7751468420028687, "rewards/rejected": -2.0904202461242676, "step": 3932 }, { "epoch": 0.45, "learning_rate": 1.6648718248858713e-07, "logits/chosen": -3.1802546977996826, "logits/rejected": -3.330568790435791, "logps/chosen": -196.8290252685547, "logps/rejected": -195.74844360351562, "loss": 0.369, "rewards/accuracies": 0.875, "rewards/chosen": -0.31033021211624146, "rewards/margins": 1.2503724098205566, "rewards/rejected": -1.5607025623321533, "step": 3933 }, { "epoch": 0.45, "learning_rate": 1.6645206601896289e-07, "logits/chosen": -3.4656312465667725, "logits/rejected": -3.6490557193756104, "logps/chosen": -51.048744201660156, "logps/rejected": -150.63478088378906, "loss": 0.2075, "rewards/accuracies": 0.875, "rewards/chosen": 0.5096320509910583, "rewards/margins": 2.744006872177124, "rewards/rejected": -2.234375, "step": 3934 }, { "epoch": 0.45, "learning_rate": 1.6641694954933864e-07, "logits/chosen": -3.1361091136932373, "logits/rejected": -3.2533063888549805, "logps/chosen": -229.19491577148438, "logps/rejected": -198.26638793945312, "loss": 0.5645, "rewards/accuracies": 0.75, "rewards/chosen": -0.7726982831954956, "rewards/margins": 0.8672773838043213, "rewards/rejected": -1.6399755477905273, "step": 3935 }, { "epoch": 0.45, "learning_rate": 1.6638183307971437e-07, "logits/chosen": -3.426068067550659, "logits/rejected": -3.300900459289551, "logps/chosen": -243.7493896484375, "logps/rejected": -140.19775390625, "loss": 0.9715, "rewards/accuracies": 0.625, "rewards/chosen": -0.14539562165737152, "rewards/margins": 0.06436687707901001, "rewards/rejected": -0.20976249873638153, "step": 3936 }, { "epoch": 0.45, "learning_rate": 1.6634671661009012e-07, "logits/chosen": -2.2964227199554443, "logits/rejected": -2.6459438800811768, "logps/chosen": -329.76141357421875, "logps/rejected": -274.14044189453125, "loss": 0.516, "rewards/accuracies": 0.625, "rewards/chosen": 0.08467893302440643, "rewards/margins": 0.8177881836891174, "rewards/rejected": -0.7331092357635498, "step": 3937 }, { "epoch": 0.45, "learning_rate": 1.6631160014046585e-07, "logits/chosen": -2.7670536041259766, "logits/rejected": -2.8458662033081055, "logps/chosen": -293.47601318359375, "logps/rejected": -248.64146423339844, "loss": 0.1357, "rewards/accuracies": 1.0, "rewards/chosen": -0.4590204358100891, "rewards/margins": 2.6637744903564453, "rewards/rejected": -3.1227951049804688, "step": 3938 }, { "epoch": 0.45, "learning_rate": 1.6627648367084163e-07, "logits/chosen": -3.2104363441467285, "logits/rejected": -3.38616943359375, "logps/chosen": -193.8213653564453, "logps/rejected": -208.594970703125, "loss": 0.232, "rewards/accuracies": 0.875, "rewards/chosen": -0.024422720074653625, "rewards/margins": 2.3275668621063232, "rewards/rejected": -2.351989507675171, "step": 3939 }, { "epoch": 0.45, "learning_rate": 1.6624136720121738e-07, "logits/chosen": -2.6745595932006836, "logits/rejected": -2.9450459480285645, "logps/chosen": -506.9779357910156, "logps/rejected": -217.281494140625, "loss": 0.2326, "rewards/accuracies": 1.0, "rewards/chosen": 0.2863805592060089, "rewards/margins": 2.1853699684143066, "rewards/rejected": -1.8989893198013306, "step": 3940 }, { "epoch": 0.45, "learning_rate": 1.662062507315931e-07, "logits/chosen": -3.8072128295898438, "logits/rejected": -3.6781773567199707, "logps/chosen": -271.0982971191406, "logps/rejected": -268.2368469238281, "loss": 0.4746, "rewards/accuracies": 0.75, "rewards/chosen": -0.314240038394928, "rewards/margins": 1.5845011472702026, "rewards/rejected": -1.8987411260604858, "step": 3941 }, { "epoch": 0.45, "learning_rate": 1.6617113426196886e-07, "logits/chosen": -3.32926869392395, "logits/rejected": -3.3597798347473145, "logps/chosen": -393.288818359375, "logps/rejected": -378.1005859375, "loss": 0.1798, "rewards/accuracies": 1.0, "rewards/chosen": 0.06532678753137589, "rewards/margins": 3.001054286956787, "rewards/rejected": -2.935727596282959, "step": 3942 }, { "epoch": 0.45, "learning_rate": 1.6613601779234462e-07, "logits/chosen": -3.183940887451172, "logits/rejected": -3.24076509475708, "logps/chosen": -261.2845458984375, "logps/rejected": -234.1049346923828, "loss": 0.138, "rewards/accuracies": 1.0, "rewards/chosen": 0.1516067534685135, "rewards/margins": 2.7614076137542725, "rewards/rejected": -2.6098008155822754, "step": 3943 }, { "epoch": 0.45, "learning_rate": 1.6610090132272034e-07, "logits/chosen": -3.7316646575927734, "logits/rejected": -3.6260194778442383, "logps/chosen": -311.6120910644531, "logps/rejected": -317.6443786621094, "loss": 0.3553, "rewards/accuracies": 0.75, "rewards/chosen": -0.18183842301368713, "rewards/margins": 1.6836862564086914, "rewards/rejected": -1.8655246496200562, "step": 3944 }, { "epoch": 0.45, "learning_rate": 1.660657848530961e-07, "logits/chosen": -2.9080231189727783, "logits/rejected": -2.743009090423584, "logps/chosen": -424.9231872558594, "logps/rejected": -264.76416015625, "loss": 0.3021, "rewards/accuracies": 0.75, "rewards/chosen": 0.24496810138225555, "rewards/margins": 2.410780906677246, "rewards/rejected": -2.1658129692077637, "step": 3945 }, { "epoch": 0.45, "learning_rate": 1.6603066838347183e-07, "logits/chosen": -3.3465728759765625, "logits/rejected": -3.4729700088500977, "logps/chosen": -207.16937255859375, "logps/rejected": -276.08050537109375, "loss": 0.183, "rewards/accuracies": 0.875, "rewards/chosen": 0.33600008487701416, "rewards/margins": 2.6388473510742188, "rewards/rejected": -2.302847385406494, "step": 3946 }, { "epoch": 0.46, "learning_rate": 1.6599555191384758e-07, "logits/chosen": -3.1818604469299316, "logits/rejected": -3.1019415855407715, "logps/chosen": -206.70872497558594, "logps/rejected": -292.5819091796875, "loss": 0.8129, "rewards/accuracies": 0.625, "rewards/chosen": -0.7445281147956848, "rewards/margins": 0.5674898624420166, "rewards/rejected": -1.3120179176330566, "step": 3947 }, { "epoch": 0.46, "learning_rate": 1.6596043544422333e-07, "logits/chosen": -3.1033477783203125, "logits/rejected": -3.2285282611846924, "logps/chosen": -287.02252197265625, "logps/rejected": -478.1990966796875, "loss": 0.2269, "rewards/accuracies": 0.875, "rewards/chosen": 0.09701438993215561, "rewards/margins": 3.983635425567627, "rewards/rejected": -3.8866209983825684, "step": 3948 }, { "epoch": 0.46, "learning_rate": 1.6592531897459906e-07, "logits/chosen": -3.4116649627685547, "logits/rejected": -3.4333150386810303, "logps/chosen": -275.67254638671875, "logps/rejected": -274.20623779296875, "loss": 0.2816, "rewards/accuracies": 1.0, "rewards/chosen": -0.5063928961753845, "rewards/margins": 1.4183909893035889, "rewards/rejected": -1.9247838258743286, "step": 3949 }, { "epoch": 0.46, "learning_rate": 1.6589020250497484e-07, "logits/chosen": -2.7551186084747314, "logits/rejected": -2.8596112728118896, "logps/chosen": -231.7318878173828, "logps/rejected": -301.86419677734375, "loss": 0.5625, "rewards/accuracies": 0.875, "rewards/chosen": -0.2832072377204895, "rewards/margins": 1.9950810670852661, "rewards/rejected": -2.2782883644104004, "step": 3950 }, { "epoch": 0.46, "learning_rate": 1.6585508603535054e-07, "logits/chosen": -2.949077606201172, "logits/rejected": -3.0316786766052246, "logps/chosen": -139.98046875, "logps/rejected": -231.80307006835938, "loss": 0.2196, "rewards/accuracies": 0.875, "rewards/chosen": 0.21762631833553314, "rewards/margins": 2.1158483028411865, "rewards/rejected": -1.8982219696044922, "step": 3951 }, { "epoch": 0.46, "learning_rate": 1.6581996956572632e-07, "logits/chosen": -2.6996636390686035, "logits/rejected": -2.743530750274658, "logps/chosen": -224.20693969726562, "logps/rejected": -224.3370361328125, "loss": 0.3443, "rewards/accuracies": 0.875, "rewards/chosen": -0.21939529478549957, "rewards/margins": 2.306821823120117, "rewards/rejected": -2.526216983795166, "step": 3952 }, { "epoch": 0.46, "learning_rate": 1.6578485309610208e-07, "logits/chosen": -2.6949586868286133, "logits/rejected": -2.7072267532348633, "logps/chosen": -450.14263916015625, "logps/rejected": -334.2158508300781, "loss": 0.2455, "rewards/accuracies": 0.875, "rewards/chosen": -0.07065839320421219, "rewards/margins": 2.2746214866638184, "rewards/rejected": -2.3452799320220947, "step": 3953 }, { "epoch": 0.46, "learning_rate": 1.657497366264778e-07, "logits/chosen": -3.669567823410034, "logits/rejected": -3.545783281326294, "logps/chosen": -267.88055419921875, "logps/rejected": -266.5813903808594, "loss": 0.1155, "rewards/accuracies": 1.0, "rewards/chosen": 0.16398391127586365, "rewards/margins": 3.2232327461242676, "rewards/rejected": -3.059248924255371, "step": 3954 }, { "epoch": 0.46, "learning_rate": 1.6571462015685356e-07, "logits/chosen": -4.099387168884277, "logits/rejected": -3.9782285690307617, "logps/chosen": -140.45372009277344, "logps/rejected": -200.38003540039062, "loss": 0.5413, "rewards/accuracies": 0.625, "rewards/chosen": -0.6678177714347839, "rewards/margins": 0.8936976194381714, "rewards/rejected": -1.5615154504776, "step": 3955 }, { "epoch": 0.46, "learning_rate": 1.656795036872293e-07, "logits/chosen": -2.6751601696014404, "logits/rejected": -2.9875009059906006, "logps/chosen": -278.93646240234375, "logps/rejected": -434.8519287109375, "loss": 0.1888, "rewards/accuracies": 0.875, "rewards/chosen": 0.4723946750164032, "rewards/margins": 3.45566987991333, "rewards/rejected": -2.9832751750946045, "step": 3956 }, { "epoch": 0.46, "learning_rate": 1.6564438721760504e-07, "logits/chosen": -3.4537341594696045, "logits/rejected": -3.411539077758789, "logps/chosen": -205.83761596679688, "logps/rejected": -227.76454162597656, "loss": 0.2794, "rewards/accuracies": 1.0, "rewards/chosen": -0.07443726062774658, "rewards/margins": 1.5221247673034668, "rewards/rejected": -1.596562147140503, "step": 3957 }, { "epoch": 0.46, "learning_rate": 1.656092707479808e-07, "logits/chosen": -3.77352237701416, "logits/rejected": -3.8291430473327637, "logps/chosen": -322.41375732421875, "logps/rejected": -254.01405334472656, "loss": 0.2852, "rewards/accuracies": 0.875, "rewards/chosen": 0.32932227849960327, "rewards/margins": 1.9812369346618652, "rewards/rejected": -1.6519148349761963, "step": 3958 }, { "epoch": 0.46, "learning_rate": 1.6557415427835652e-07, "logits/chosen": -3.1541318893432617, "logits/rejected": -3.266061305999756, "logps/chosen": -281.408935546875, "logps/rejected": -266.9397888183594, "loss": 0.4156, "rewards/accuracies": 0.75, "rewards/chosen": 0.02969842404127121, "rewards/margins": 1.0571486949920654, "rewards/rejected": -1.0274502038955688, "step": 3959 }, { "epoch": 0.46, "learning_rate": 1.6553903780873227e-07, "logits/chosen": -2.750767230987549, "logits/rejected": -2.77349853515625, "logps/chosen": -278.79571533203125, "logps/rejected": -245.61578369140625, "loss": 0.5922, "rewards/accuracies": 0.625, "rewards/chosen": -0.7730966210365295, "rewards/margins": 0.6253774166107178, "rewards/rejected": -1.3984739780426025, "step": 3960 }, { "epoch": 0.46, "learning_rate": 1.6550392133910805e-07, "logits/chosen": -3.9635000228881836, "logits/rejected": -3.691056966781616, "logps/chosen": -311.1716003417969, "logps/rejected": -346.4088134765625, "loss": 0.4196, "rewards/accuracies": 0.625, "rewards/chosen": -0.7732243537902832, "rewards/margins": 1.802823543548584, "rewards/rejected": -2.576047897338867, "step": 3961 }, { "epoch": 0.46, "learning_rate": 1.6546880486948378e-07, "logits/chosen": -2.7927889823913574, "logits/rejected": -2.7317700386047363, "logps/chosen": -192.64907836914062, "logps/rejected": -186.52369689941406, "loss": 0.5924, "rewards/accuracies": 0.5, "rewards/chosen": -0.018361926078796387, "rewards/margins": 0.5788378715515137, "rewards/rejected": -0.5971997976303101, "step": 3962 }, { "epoch": 0.46, "learning_rate": 1.6543368839985954e-07, "logits/chosen": -3.101942777633667, "logits/rejected": -3.1254310607910156, "logps/chosen": -197.59812927246094, "logps/rejected": -168.5692901611328, "loss": 0.2309, "rewards/accuracies": 1.0, "rewards/chosen": 0.441495418548584, "rewards/margins": 1.745103120803833, "rewards/rejected": -1.3036075830459595, "step": 3963 }, { "epoch": 0.46, "learning_rate": 1.653985719302353e-07, "logits/chosen": -3.07716703414917, "logits/rejected": -3.0328919887542725, "logps/chosen": -348.2048645019531, "logps/rejected": -369.5650329589844, "loss": 0.4957, "rewards/accuracies": 0.75, "rewards/chosen": -0.09208270907402039, "rewards/margins": 1.2332953214645386, "rewards/rejected": -1.3253778219223022, "step": 3964 }, { "epoch": 0.46, "learning_rate": 1.6536345546061102e-07, "logits/chosen": -2.4997291564941406, "logits/rejected": -2.5756750106811523, "logps/chosen": -211.7244110107422, "logps/rejected": -258.54364013671875, "loss": 0.7362, "rewards/accuracies": 0.875, "rewards/chosen": -0.4678249657154083, "rewards/margins": 1.054385781288147, "rewards/rejected": -1.522210717201233, "step": 3965 }, { "epoch": 0.46, "learning_rate": 1.6532833899098677e-07, "logits/chosen": -3.635465145111084, "logits/rejected": -3.5831503868103027, "logps/chosen": -319.3228759765625, "logps/rejected": -218.89492797851562, "loss": 0.447, "rewards/accuracies": 0.875, "rewards/chosen": -0.048495396971702576, "rewards/margins": 0.8125790357589722, "rewards/rejected": -0.8610744476318359, "step": 3966 }, { "epoch": 0.46, "learning_rate": 1.652932225213625e-07, "logits/chosen": -3.5552186965942383, "logits/rejected": -3.435089349746704, "logps/chosen": -111.84662628173828, "logps/rejected": -101.10833740234375, "loss": 0.6296, "rewards/accuracies": 0.75, "rewards/chosen": -0.5400054454803467, "rewards/margins": 0.3165719211101532, "rewards/rejected": -0.8565773367881775, "step": 3967 }, { "epoch": 0.46, "learning_rate": 1.6525810605173825e-07, "logits/chosen": -3.2507565021514893, "logits/rejected": -3.3229596614837646, "logps/chosen": -208.93109130859375, "logps/rejected": -178.7959747314453, "loss": 0.2974, "rewards/accuracies": 1.0, "rewards/chosen": -0.28170067071914673, "rewards/margins": 1.6594507694244385, "rewards/rejected": -1.94115149974823, "step": 3968 }, { "epoch": 0.46, "learning_rate": 1.65222989582114e-07, "logits/chosen": -2.9264767169952393, "logits/rejected": -3.0075089931488037, "logps/chosen": -259.5301513671875, "logps/rejected": -296.82489013671875, "loss": 0.2094, "rewards/accuracies": 1.0, "rewards/chosen": 0.1802763193845749, "rewards/margins": 2.2352750301361084, "rewards/rejected": -2.0549988746643066, "step": 3969 }, { "epoch": 0.46, "learning_rate": 1.6518787311248973e-07, "logits/chosen": -3.341752529144287, "logits/rejected": -3.175489902496338, "logps/chosen": -286.57830810546875, "logps/rejected": -220.0558319091797, "loss": 0.3078, "rewards/accuracies": 0.875, "rewards/chosen": 0.1467384397983551, "rewards/margins": 1.9107708930969238, "rewards/rejected": -1.7640326023101807, "step": 3970 }, { "epoch": 0.46, "learning_rate": 1.651527566428655e-07, "logits/chosen": -2.144599199295044, "logits/rejected": -2.144249677658081, "logps/chosen": -337.7557678222656, "logps/rejected": -230.0604705810547, "loss": 0.3223, "rewards/accuracies": 0.875, "rewards/chosen": 0.09539368748664856, "rewards/margins": 1.2287732362747192, "rewards/rejected": -1.133379578590393, "step": 3971 }, { "epoch": 0.46, "learning_rate": 1.6511764017324127e-07, "logits/chosen": -3.231484889984131, "logits/rejected": -3.3901374340057373, "logps/chosen": -157.3760986328125, "logps/rejected": -285.7887268066406, "loss": 0.3465, "rewards/accuracies": 0.75, "rewards/chosen": 0.16599106788635254, "rewards/margins": 2.4666965007781982, "rewards/rejected": -2.3007051944732666, "step": 3972 }, { "epoch": 0.46, "learning_rate": 1.65082523703617e-07, "logits/chosen": -3.4712109565734863, "logits/rejected": -3.370635509490967, "logps/chosen": -304.71417236328125, "logps/rejected": -256.9755859375, "loss": 0.2937, "rewards/accuracies": 1.0, "rewards/chosen": -0.16690418124198914, "rewards/margins": 1.6194047927856445, "rewards/rejected": -1.786309003829956, "step": 3973 }, { "epoch": 0.46, "learning_rate": 1.6504740723399275e-07, "logits/chosen": -3.4244792461395264, "logits/rejected": -3.6659579277038574, "logps/chosen": -214.91839599609375, "logps/rejected": -210.9064178466797, "loss": 0.1606, "rewards/accuracies": 1.0, "rewards/chosen": 0.8289140462875366, "rewards/margins": 2.5794179439544678, "rewards/rejected": -1.7505037784576416, "step": 3974 }, { "epoch": 0.46, "learning_rate": 1.6501229076436848e-07, "logits/chosen": -3.188371181488037, "logits/rejected": -2.8804514408111572, "logps/chosen": -191.48306274414062, "logps/rejected": -278.719970703125, "loss": 0.4044, "rewards/accuracies": 0.75, "rewards/chosen": -0.4965165853500366, "rewards/margins": 2.8400635719299316, "rewards/rejected": -3.336580276489258, "step": 3975 }, { "epoch": 0.46, "learning_rate": 1.6497717429474423e-07, "logits/chosen": -3.4767565727233887, "logits/rejected": -3.351149082183838, "logps/chosen": -307.1471252441406, "logps/rejected": -216.97792053222656, "loss": 0.4328, "rewards/accuracies": 0.75, "rewards/chosen": 0.40834617614746094, "rewards/margins": 1.675047516822815, "rewards/rejected": -1.266701340675354, "step": 3976 }, { "epoch": 0.46, "learning_rate": 1.6494205782511998e-07, "logits/chosen": -3.3121650218963623, "logits/rejected": -2.923543930053711, "logps/chosen": -239.4761962890625, "logps/rejected": -174.41180419921875, "loss": 0.6314, "rewards/accuracies": 0.625, "rewards/chosen": -0.20744732022285461, "rewards/margins": 0.5713068246841431, "rewards/rejected": -0.7787541151046753, "step": 3977 }, { "epoch": 0.46, "learning_rate": 1.649069413554957e-07, "logits/chosen": -3.0934929847717285, "logits/rejected": -2.7812631130218506, "logps/chosen": -181.4929656982422, "logps/rejected": -164.94595336914062, "loss": 0.3004, "rewards/accuracies": 0.75, "rewards/chosen": 0.8139621615409851, "rewards/margins": 2.257063627243042, "rewards/rejected": -1.443101406097412, "step": 3978 }, { "epoch": 0.46, "learning_rate": 1.6487182488587146e-07, "logits/chosen": -2.771048069000244, "logits/rejected": -3.094297409057617, "logps/chosen": -177.0226287841797, "logps/rejected": -219.43560791015625, "loss": 0.5734, "rewards/accuracies": 0.625, "rewards/chosen": 0.057240039110183716, "rewards/margins": 1.981013298034668, "rewards/rejected": -1.923773169517517, "step": 3979 }, { "epoch": 0.46, "learning_rate": 1.6483670841624722e-07, "logits/chosen": -3.1800315380096436, "logits/rejected": -3.2251572608947754, "logps/chosen": -173.72579956054688, "logps/rejected": -198.10275268554688, "loss": 0.3235, "rewards/accuracies": 0.875, "rewards/chosen": -0.266027569770813, "rewards/margins": 1.5047733783721924, "rewards/rejected": -1.770801067352295, "step": 3980 }, { "epoch": 0.46, "learning_rate": 1.6480159194662295e-07, "logits/chosen": -3.216588020324707, "logits/rejected": -3.4371676445007324, "logps/chosen": -217.40249633789062, "logps/rejected": -344.0405578613281, "loss": 0.108, "rewards/accuracies": 1.0, "rewards/chosen": 0.4530790448188782, "rewards/margins": 3.0385255813598633, "rewards/rejected": -2.585446834564209, "step": 3981 }, { "epoch": 0.46, "learning_rate": 1.647664754769987e-07, "logits/chosen": -2.758603572845459, "logits/rejected": -2.7158920764923096, "logps/chosen": -297.5655822753906, "logps/rejected": -320.78558349609375, "loss": 0.7365, "rewards/accuracies": 0.75, "rewards/chosen": -0.6531367897987366, "rewards/margins": 0.6878114342689514, "rewards/rejected": -1.3409483432769775, "step": 3982 }, { "epoch": 0.46, "learning_rate": 1.6473135900737443e-07, "logits/chosen": -4.092775344848633, "logits/rejected": -3.9806699752807617, "logps/chosen": -318.9555358886719, "logps/rejected": -260.61962890625, "loss": 0.7359, "rewards/accuracies": 0.5, "rewards/chosen": -0.40656930208206177, "rewards/margins": 0.9104679822921753, "rewards/rejected": -1.3170373439788818, "step": 3983 }, { "epoch": 0.46, "learning_rate": 1.646962425377502e-07, "logits/chosen": -2.8695454597473145, "logits/rejected": -2.8708181381225586, "logps/chosen": -304.20294189453125, "logps/rejected": -250.50601196289062, "loss": 0.2328, "rewards/accuracies": 1.0, "rewards/chosen": -0.0858922079205513, "rewards/margins": 2.2498929500579834, "rewards/rejected": -2.335785150527954, "step": 3984 }, { "epoch": 0.46, "learning_rate": 1.6466112606812596e-07, "logits/chosen": -3.3683032989501953, "logits/rejected": -3.6590805053710938, "logps/chosen": -173.3211669921875, "logps/rejected": -184.30824279785156, "loss": 0.2949, "rewards/accuracies": 0.875, "rewards/chosen": 0.17366328835487366, "rewards/margins": 2.2997193336486816, "rewards/rejected": -2.12605619430542, "step": 3985 }, { "epoch": 0.46, "learning_rate": 1.646260095985017e-07, "logits/chosen": -3.6463472843170166, "logits/rejected": -3.603259563446045, "logps/chosen": -230.11764526367188, "logps/rejected": -266.04296875, "loss": 0.6006, "rewards/accuracies": 0.625, "rewards/chosen": -0.46812936663627625, "rewards/margins": 0.8776636123657227, "rewards/rejected": -1.3457930088043213, "step": 3986 }, { "epoch": 0.46, "learning_rate": 1.6459089312887744e-07, "logits/chosen": -3.108025312423706, "logits/rejected": -3.0504095554351807, "logps/chosen": -181.22821044921875, "logps/rejected": -254.70335388183594, "loss": 0.6199, "rewards/accuracies": 0.5, "rewards/chosen": -0.6955282092094421, "rewards/margins": 0.3873995542526245, "rewards/rejected": -1.0829278230667114, "step": 3987 }, { "epoch": 0.46, "learning_rate": 1.645557766592532e-07, "logits/chosen": -3.343568801879883, "logits/rejected": -2.9868083000183105, "logps/chosen": -434.24591064453125, "logps/rejected": -246.72369384765625, "loss": 0.7949, "rewards/accuracies": 0.5, "rewards/chosen": -0.42785656452178955, "rewards/margins": 1.0305135250091553, "rewards/rejected": -1.4583702087402344, "step": 3988 }, { "epoch": 0.46, "learning_rate": 1.6452066018962892e-07, "logits/chosen": -3.6544039249420166, "logits/rejected": -3.333561897277832, "logps/chosen": -161.2752227783203, "logps/rejected": -171.18716430664062, "loss": 1.1869, "rewards/accuracies": 0.5, "rewards/chosen": -1.1563000679016113, "rewards/margins": 0.6723873019218445, "rewards/rejected": -1.8286874294281006, "step": 3989 }, { "epoch": 0.46, "learning_rate": 1.6448554372000468e-07, "logits/chosen": -3.1885628700256348, "logits/rejected": -2.833173990249634, "logps/chosen": -301.1690368652344, "logps/rejected": -215.670654296875, "loss": 0.2884, "rewards/accuracies": 0.75, "rewards/chosen": -0.03793298453092575, "rewards/margins": 2.3116183280944824, "rewards/rejected": -2.349551200866699, "step": 3990 }, { "epoch": 0.46, "learning_rate": 1.644504272503804e-07, "logits/chosen": -2.951505661010742, "logits/rejected": -3.2268502712249756, "logps/chosen": -204.24673461914062, "logps/rejected": -200.89932250976562, "loss": 0.5415, "rewards/accuracies": 0.625, "rewards/chosen": -0.6377841830253601, "rewards/margins": 1.9683786630630493, "rewards/rejected": -2.6061627864837646, "step": 3991 }, { "epoch": 0.46, "learning_rate": 1.6441531078075616e-07, "logits/chosen": -2.80319881439209, "logits/rejected": -2.6608431339263916, "logps/chosen": -185.8607635498047, "logps/rejected": -131.388671875, "loss": 0.6989, "rewards/accuracies": 0.625, "rewards/chosen": -0.36704012751579285, "rewards/margins": 0.2181515395641327, "rewards/rejected": -0.5851916074752808, "step": 3992 }, { "epoch": 0.46, "learning_rate": 1.643801943111319e-07, "logits/chosen": -3.5328030586242676, "logits/rejected": -3.614912509918213, "logps/chosen": -220.2752227783203, "logps/rejected": -266.94830322265625, "loss": 0.2912, "rewards/accuracies": 0.875, "rewards/chosen": 0.13142554461956024, "rewards/margins": 3.371856451034546, "rewards/rejected": -3.2404305934906006, "step": 3993 }, { "epoch": 0.46, "learning_rate": 1.6434507784150764e-07, "logits/chosen": -3.4521870613098145, "logits/rejected": -3.2855241298675537, "logps/chosen": -257.023193359375, "logps/rejected": -307.99029541015625, "loss": 0.2527, "rewards/accuracies": 0.875, "rewards/chosen": -0.6928210258483887, "rewards/margins": 2.042175769805908, "rewards/rejected": -2.734996795654297, "step": 3994 }, { "epoch": 0.46, "learning_rate": 1.6430996137188342e-07, "logits/chosen": -3.950422763824463, "logits/rejected": -3.8057146072387695, "logps/chosen": -236.10211181640625, "logps/rejected": -177.81661987304688, "loss": 0.1995, "rewards/accuracies": 1.0, "rewards/chosen": 0.5446525812149048, "rewards/margins": 2.14439058303833, "rewards/rejected": -1.5997378826141357, "step": 3995 }, { "epoch": 0.46, "learning_rate": 1.6427484490225917e-07, "logits/chosen": -2.7173614501953125, "logits/rejected": -2.8052632808685303, "logps/chosen": -337.5051574707031, "logps/rejected": -261.2828369140625, "loss": 0.3001, "rewards/accuracies": 1.0, "rewards/chosen": 0.46268516778945923, "rewards/margins": 1.6465518474578857, "rewards/rejected": -1.1838668584823608, "step": 3996 }, { "epoch": 0.46, "learning_rate": 1.642397284326349e-07, "logits/chosen": -3.3771941661834717, "logits/rejected": -3.3279247283935547, "logps/chosen": -316.04620361328125, "logps/rejected": -219.026611328125, "loss": 0.6833, "rewards/accuracies": 0.75, "rewards/chosen": -0.4462580978870392, "rewards/margins": 1.2310659885406494, "rewards/rejected": -1.6773239374160767, "step": 3997 }, { "epoch": 0.46, "learning_rate": 1.6420461196301066e-07, "logits/chosen": -2.562575578689575, "logits/rejected": -2.5766289234161377, "logps/chosen": -279.7907409667969, "logps/rejected": -180.93743896484375, "loss": 0.4346, "rewards/accuracies": 0.875, "rewards/chosen": -0.0762377679347992, "rewards/margins": 0.733461856842041, "rewards/rejected": -0.8096995949745178, "step": 3998 }, { "epoch": 0.46, "learning_rate": 1.6416949549338638e-07, "logits/chosen": -2.6048173904418945, "logits/rejected": -2.518115997314453, "logps/chosen": -362.49322509765625, "logps/rejected": -299.4237060546875, "loss": 0.3127, "rewards/accuracies": 0.875, "rewards/chosen": -0.29383695125579834, "rewards/margins": 2.069251298904419, "rewards/rejected": -2.3630881309509277, "step": 3999 }, { "epoch": 0.46, "learning_rate": 1.6413437902376214e-07, "logits/chosen": -2.8831188678741455, "logits/rejected": -2.9819562435150146, "logps/chosen": -194.85986328125, "logps/rejected": -283.9088134765625, "loss": 0.2861, "rewards/accuracies": 0.875, "rewards/chosen": -0.10785654187202454, "rewards/margins": 2.621767044067383, "rewards/rejected": -2.729623556137085, "step": 4000 }, { "epoch": 0.46, "eval_logits/chosen": -2.8433773517608643, "eval_logits/rejected": -2.8090808391571045, "eval_logps/chosen": -293.8716125488281, "eval_logps/rejected": -236.62954711914062, "eval_loss": 0.43787261843681335, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.018334832042455673, "eval_rewards/margins": 1.2470276355743408, "eval_rewards/rejected": -1.2286927700042725, "eval_runtime": 32.6016, "eval_samples_per_second": 2.147, "eval_steps_per_second": 1.074, "step": 4000 }, { "epoch": 0.46, "learning_rate": 1.640992625541379e-07, "logits/chosen": -3.097788095474243, "logits/rejected": -3.0029335021972656, "logps/chosen": -158.92428588867188, "logps/rejected": -166.89224243164062, "loss": 0.7172, "rewards/accuracies": 0.625, "rewards/chosen": -0.6463521718978882, "rewards/margins": 0.6549421548843384, "rewards/rejected": -1.3012943267822266, "step": 4001 }, { "epoch": 0.46, "learning_rate": 1.6406414608451362e-07, "logits/chosen": -3.387054681777954, "logits/rejected": -3.2726821899414062, "logps/chosen": -165.92349243164062, "logps/rejected": -280.93206787109375, "loss": 0.4087, "rewards/accuracies": 0.75, "rewards/chosen": -0.06472450494766235, "rewards/margins": 2.3058180809020996, "rewards/rejected": -2.370542526245117, "step": 4002 }, { "epoch": 0.46, "learning_rate": 1.6402902961488937e-07, "logits/chosen": -2.514674186706543, "logits/rejected": -2.4830241203308105, "logps/chosen": -321.7777404785156, "logps/rejected": -171.14181518554688, "loss": 0.4612, "rewards/accuracies": 0.875, "rewards/chosen": 0.3805154263973236, "rewards/margins": 1.555202603340149, "rewards/rejected": -1.174687147140503, "step": 4003 }, { "epoch": 0.46, "learning_rate": 1.639939131452651e-07, "logits/chosen": -3.5103750228881836, "logits/rejected": -3.2903897762298584, "logps/chosen": -212.14007568359375, "logps/rejected": -239.06271362304688, "loss": 0.8264, "rewards/accuracies": 0.625, "rewards/chosen": -0.5355724692344666, "rewards/margins": 0.48404234647750854, "rewards/rejected": -1.019614815711975, "step": 4004 }, { "epoch": 0.46, "learning_rate": 1.6395879667564085e-07, "logits/chosen": -3.399909496307373, "logits/rejected": -3.4066264629364014, "logps/chosen": -218.37957763671875, "logps/rejected": -162.23890686035156, "loss": 0.4699, "rewards/accuracies": 0.75, "rewards/chosen": -0.05159464478492737, "rewards/margins": 1.2562288045883179, "rewards/rejected": -1.3078234195709229, "step": 4005 }, { "epoch": 0.46, "learning_rate": 1.6392368020601663e-07, "logits/chosen": -3.539332389831543, "logits/rejected": -3.36002779006958, "logps/chosen": -244.35662841796875, "logps/rejected": -318.65423583984375, "loss": 0.3805, "rewards/accuracies": 0.75, "rewards/chosen": -0.20495277643203735, "rewards/margins": 3.080638885498047, "rewards/rejected": -3.2855913639068604, "step": 4006 }, { "epoch": 0.46, "learning_rate": 1.6388856373639236e-07, "logits/chosen": -2.781803607940674, "logits/rejected": -3.110149383544922, "logps/chosen": -295.13690185546875, "logps/rejected": -344.74249267578125, "loss": 0.4069, "rewards/accuracies": 0.75, "rewards/chosen": -0.07358483970165253, "rewards/margins": 2.223085880279541, "rewards/rejected": -2.296671152114868, "step": 4007 }, { "epoch": 0.46, "learning_rate": 1.6385344726676812e-07, "logits/chosen": -3.62471342086792, "logits/rejected": -3.3946077823638916, "logps/chosen": -260.4302978515625, "logps/rejected": -314.1594543457031, "loss": 0.3844, "rewards/accuracies": 0.875, "rewards/chosen": -0.350037157535553, "rewards/margins": 2.478698968887329, "rewards/rejected": -2.8287360668182373, "step": 4008 }, { "epoch": 0.46, "learning_rate": 1.6381833079714387e-07, "logits/chosen": -3.5677404403686523, "logits/rejected": -3.5073530673980713, "logps/chosen": -243.21038818359375, "logps/rejected": -238.63772583007812, "loss": 0.3595, "rewards/accuracies": 0.875, "rewards/chosen": 0.5886500477790833, "rewards/margins": 1.4157829284667969, "rewards/rejected": -0.8271329402923584, "step": 4009 }, { "epoch": 0.46, "learning_rate": 1.637832143275196e-07, "logits/chosen": -3.1958200931549072, "logits/rejected": -3.2570877075195312, "logps/chosen": -390.84637451171875, "logps/rejected": -211.732177734375, "loss": 0.4516, "rewards/accuracies": 0.875, "rewards/chosen": 0.023188352584838867, "rewards/margins": 0.9268955588340759, "rewards/rejected": -0.9037072658538818, "step": 4010 }, { "epoch": 0.46, "learning_rate": 1.6374809785789535e-07, "logits/chosen": -2.602372884750366, "logits/rejected": -2.593405246734619, "logps/chosen": -410.18829345703125, "logps/rejected": -237.3779296875, "loss": 0.5197, "rewards/accuracies": 0.875, "rewards/chosen": 0.20458859205245972, "rewards/margins": 0.612922191619873, "rewards/rejected": -0.40833356976509094, "step": 4011 }, { "epoch": 0.46, "learning_rate": 1.6371298138827108e-07, "logits/chosen": -3.1995320320129395, "logits/rejected": -2.9835469722747803, "logps/chosen": -342.9864501953125, "logps/rejected": -271.7644958496094, "loss": 0.2711, "rewards/accuracies": 1.0, "rewards/chosen": -0.02700500562787056, "rewards/margins": 1.590925931930542, "rewards/rejected": -1.6179308891296387, "step": 4012 }, { "epoch": 0.46, "learning_rate": 1.6367786491864683e-07, "logits/chosen": -2.8886349201202393, "logits/rejected": -2.9970672130584717, "logps/chosen": -189.3954620361328, "logps/rejected": -183.78915405273438, "loss": 0.4653, "rewards/accuracies": 0.75, "rewards/chosen": -0.13795128464698792, "rewards/margins": 1.4225164651870728, "rewards/rejected": -1.5604678392410278, "step": 4013 }, { "epoch": 0.46, "learning_rate": 1.6364274844902259e-07, "logits/chosen": -2.8558554649353027, "logits/rejected": -2.925271511077881, "logps/chosen": -279.20123291015625, "logps/rejected": -246.9000244140625, "loss": 0.7978, "rewards/accuracies": 0.5, "rewards/chosen": -0.030086562037467957, "rewards/margins": 0.22714731097221375, "rewards/rejected": -0.2572338581085205, "step": 4014 }, { "epoch": 0.46, "learning_rate": 1.636076319793983e-07, "logits/chosen": -2.9062204360961914, "logits/rejected": -2.839794635772705, "logps/chosen": -97.71676635742188, "logps/rejected": -196.38473510742188, "loss": 0.3137, "rewards/accuracies": 0.875, "rewards/chosen": -0.6069939136505127, "rewards/margins": 1.4259965419769287, "rewards/rejected": -2.0329904556274414, "step": 4015 }, { "epoch": 0.46, "learning_rate": 1.6357251550977407e-07, "logits/chosen": -4.005174160003662, "logits/rejected": -3.9457809925079346, "logps/chosen": -328.5836486816406, "logps/rejected": -204.25997924804688, "loss": 0.4288, "rewards/accuracies": 0.875, "rewards/chosen": 0.14630062878131866, "rewards/margins": 1.3929662704467773, "rewards/rejected": -1.246665596961975, "step": 4016 }, { "epoch": 0.46, "learning_rate": 1.6353739904014985e-07, "logits/chosen": -3.495882987976074, "logits/rejected": -2.9803388118743896, "logps/chosen": -348.90936279296875, "logps/rejected": -303.5547180175781, "loss": 0.4247, "rewards/accuracies": 0.75, "rewards/chosen": -0.420043021440506, "rewards/margins": 1.2908498048782349, "rewards/rejected": -1.710892677307129, "step": 4017 }, { "epoch": 0.46, "learning_rate": 1.6350228257052557e-07, "logits/chosen": -2.914503812789917, "logits/rejected": -2.8801662921905518, "logps/chosen": -309.33160400390625, "logps/rejected": -240.04901123046875, "loss": 0.3722, "rewards/accuracies": 0.875, "rewards/chosen": 0.14050158858299255, "rewards/margins": 1.5887449979782104, "rewards/rejected": -1.448243498802185, "step": 4018 }, { "epoch": 0.46, "learning_rate": 1.6346716610090133e-07, "logits/chosen": -3.072740077972412, "logits/rejected": -3.12315034866333, "logps/chosen": -139.6576385498047, "logps/rejected": -275.0268859863281, "loss": 0.387, "rewards/accuracies": 0.875, "rewards/chosen": -0.15434737503528595, "rewards/margins": 2.192469596862793, "rewards/rejected": -2.3468170166015625, "step": 4019 }, { "epoch": 0.46, "learning_rate": 1.6343204963127706e-07, "logits/chosen": -3.4853806495666504, "logits/rejected": -3.199787139892578, "logps/chosen": -251.83929443359375, "logps/rejected": -286.9573974609375, "loss": 0.4732, "rewards/accuracies": 0.625, "rewards/chosen": 0.0514964684844017, "rewards/margins": 0.9908259510993958, "rewards/rejected": -0.9393295049667358, "step": 4020 }, { "epoch": 0.46, "learning_rate": 1.633969331616528e-07, "logits/chosen": -3.445040225982666, "logits/rejected": -3.291023015975952, "logps/chosen": -379.9154357910156, "logps/rejected": -162.46421813964844, "loss": 0.6259, "rewards/accuracies": 0.625, "rewards/chosen": -0.44096535444259644, "rewards/margins": 0.8879681825637817, "rewards/rejected": -1.3289337158203125, "step": 4021 }, { "epoch": 0.46, "learning_rate": 1.6336181669202856e-07, "logits/chosen": -3.3272764682769775, "logits/rejected": -2.935596466064453, "logps/chosen": -306.778076171875, "logps/rejected": -216.4516143798828, "loss": 0.3737, "rewards/accuracies": 0.875, "rewards/chosen": 0.16507583856582642, "rewards/margins": 1.5624847412109375, "rewards/rejected": -1.3974090814590454, "step": 4022 }, { "epoch": 0.46, "learning_rate": 1.633267002224043e-07, "logits/chosen": -2.799703359603882, "logits/rejected": -2.9963207244873047, "logps/chosen": -119.42378234863281, "logps/rejected": -195.06724548339844, "loss": 0.3615, "rewards/accuracies": 0.875, "rewards/chosen": 0.13284647464752197, "rewards/margins": 1.8496222496032715, "rewards/rejected": -1.7167757749557495, "step": 4023 }, { "epoch": 0.46, "learning_rate": 1.6329158375278004e-07, "logits/chosen": -3.6333224773406982, "logits/rejected": -3.2635669708251953, "logps/chosen": -206.45010375976562, "logps/rejected": -101.29391479492188, "loss": 0.5436, "rewards/accuracies": 0.75, "rewards/chosen": 0.3644915521144867, "rewards/margins": 0.7492147088050842, "rewards/rejected": -0.38472312688827515, "step": 4024 }, { "epoch": 0.46, "learning_rate": 1.632564672831558e-07, "logits/chosen": -3.3943490982055664, "logits/rejected": -3.5797531604766846, "logps/chosen": -266.91485595703125, "logps/rejected": -222.17868041992188, "loss": 0.8014, "rewards/accuracies": 0.75, "rewards/chosen": -0.3407958447933197, "rewards/margins": 1.2777647972106934, "rewards/rejected": -1.618560552597046, "step": 4025 }, { "epoch": 0.46, "learning_rate": 1.6322135081353153e-07, "logits/chosen": -3.2120461463928223, "logits/rejected": -3.018148183822632, "logps/chosen": -142.23968505859375, "logps/rejected": -211.62646484375, "loss": 0.2178, "rewards/accuracies": 0.875, "rewards/chosen": -0.2758142352104187, "rewards/margins": 2.663294792175293, "rewards/rejected": -2.9391088485717773, "step": 4026 }, { "epoch": 0.46, "learning_rate": 1.6318623434390728e-07, "logits/chosen": -3.059521198272705, "logits/rejected": -3.024779796600342, "logps/chosen": -204.6798858642578, "logps/rejected": -301.9896240234375, "loss": 0.2352, "rewards/accuracies": 1.0, "rewards/chosen": 0.11559763550758362, "rewards/margins": 1.718866229057312, "rewards/rejected": -1.6032685041427612, "step": 4027 }, { "epoch": 0.46, "learning_rate": 1.63151117874283e-07, "logits/chosen": -3.172276735305786, "logits/rejected": -3.2330238819122314, "logps/chosen": -175.01214599609375, "logps/rejected": -183.70687866210938, "loss": 0.4463, "rewards/accuracies": 0.875, "rewards/chosen": 0.1732264757156372, "rewards/margins": 1.4366583824157715, "rewards/rejected": -1.2634317874908447, "step": 4028 }, { "epoch": 0.46, "learning_rate": 1.631160014046588e-07, "logits/chosen": -2.864838123321533, "logits/rejected": -2.4638473987579346, "logps/chosen": -286.1737976074219, "logps/rejected": -310.2687072753906, "loss": 0.2655, "rewards/accuracies": 1.0, "rewards/chosen": 0.40263769030570984, "rewards/margins": 1.4971652030944824, "rewards/rejected": -1.0945274829864502, "step": 4029 }, { "epoch": 0.46, "learning_rate": 1.6308088493503454e-07, "logits/chosen": -3.555224895477295, "logits/rejected": -3.609050750732422, "logps/chosen": -176.36825561523438, "logps/rejected": -229.11532592773438, "loss": 0.3972, "rewards/accuracies": 0.75, "rewards/chosen": -0.44689565896987915, "rewards/margins": 1.3451135158538818, "rewards/rejected": -1.7920091152191162, "step": 4030 }, { "epoch": 0.46, "learning_rate": 1.6304576846541027e-07, "logits/chosen": -2.8344674110412598, "logits/rejected": -2.485281229019165, "logps/chosen": -230.05386352539062, "logps/rejected": -336.6531982421875, "loss": 0.2868, "rewards/accuracies": 0.75, "rewards/chosen": -0.18868786096572876, "rewards/margins": 2.620177745819092, "rewards/rejected": -2.808865547180176, "step": 4031 }, { "epoch": 0.46, "learning_rate": 1.6301065199578602e-07, "logits/chosen": -3.367945671081543, "logits/rejected": -3.166989326477051, "logps/chosen": -299.2272033691406, "logps/rejected": -223.93243408203125, "loss": 0.4351, "rewards/accuracies": 0.875, "rewards/chosen": -0.8628818988800049, "rewards/margins": 1.200608491897583, "rewards/rejected": -2.063490390777588, "step": 4032 }, { "epoch": 0.46, "learning_rate": 1.6297553552616178e-07, "logits/chosen": -3.068775177001953, "logits/rejected": -2.9324536323547363, "logps/chosen": -277.0895080566406, "logps/rejected": -259.8199768066406, "loss": 0.3232, "rewards/accuracies": 0.75, "rewards/chosen": -0.01919812336564064, "rewards/margins": 1.8387786149978638, "rewards/rejected": -1.8579767942428589, "step": 4033 }, { "epoch": 0.47, "learning_rate": 1.629404190565375e-07, "logits/chosen": -2.5898752212524414, "logits/rejected": -2.6114344596862793, "logps/chosen": -318.0885925292969, "logps/rejected": -276.208984375, "loss": 0.6615, "rewards/accuracies": 0.625, "rewards/chosen": -0.0021690428256988525, "rewards/margins": 1.2351105213165283, "rewards/rejected": -1.2372796535491943, "step": 4034 }, { "epoch": 0.47, "learning_rate": 1.6290530258691326e-07, "logits/chosen": -3.412468910217285, "logits/rejected": -3.5625195503234863, "logps/chosen": -212.18800354003906, "logps/rejected": -237.26254272460938, "loss": 0.4349, "rewards/accuracies": 0.75, "rewards/chosen": -0.5222256183624268, "rewards/margins": 0.937780499458313, "rewards/rejected": -1.4600062370300293, "step": 4035 }, { "epoch": 0.47, "learning_rate": 1.6287018611728898e-07, "logits/chosen": -3.0926148891448975, "logits/rejected": -3.334456205368042, "logps/chosen": -137.22344970703125, "logps/rejected": -167.22621154785156, "loss": 0.3686, "rewards/accuracies": 0.75, "rewards/chosen": 0.319000780582428, "rewards/margins": 1.8409645557403564, "rewards/rejected": -1.5219638347625732, "step": 4036 }, { "epoch": 0.47, "learning_rate": 1.6283506964766474e-07, "logits/chosen": -3.4398720264434814, "logits/rejected": -3.0812950134277344, "logps/chosen": -289.9349060058594, "logps/rejected": -189.63583374023438, "loss": 0.2326, "rewards/accuracies": 1.0, "rewards/chosen": -0.06960625946521759, "rewards/margins": 1.6607328653335571, "rewards/rejected": -1.7303392887115479, "step": 4037 }, { "epoch": 0.47, "learning_rate": 1.627999531780405e-07, "logits/chosen": -3.8933680057525635, "logits/rejected": -3.85137939453125, "logps/chosen": -408.55499267578125, "logps/rejected": -299.9187316894531, "loss": 0.513, "rewards/accuracies": 0.625, "rewards/chosen": -0.25546589493751526, "rewards/margins": 1.386702537536621, "rewards/rejected": -1.642168402671814, "step": 4038 }, { "epoch": 0.47, "learning_rate": 1.6276483670841622e-07, "logits/chosen": -3.684659481048584, "logits/rejected": -3.539393901824951, "logps/chosen": -248.04925537109375, "logps/rejected": -232.35426330566406, "loss": 0.3165, "rewards/accuracies": 0.875, "rewards/chosen": 0.06389844417572021, "rewards/margins": 1.899062991142273, "rewards/rejected": -1.8351644277572632, "step": 4039 }, { "epoch": 0.47, "learning_rate": 1.62729720238792e-07, "logits/chosen": -3.230335235595703, "logits/rejected": -3.340547561645508, "logps/chosen": -341.18359375, "logps/rejected": -225.42108154296875, "loss": 0.7633, "rewards/accuracies": 0.75, "rewards/chosen": -0.690831184387207, "rewards/margins": 1.0312319993972778, "rewards/rejected": -1.7220631837844849, "step": 4040 }, { "epoch": 0.47, "learning_rate": 1.6269460376916775e-07, "logits/chosen": -2.921609401702881, "logits/rejected": -2.8276190757751465, "logps/chosen": -269.6799621582031, "logps/rejected": -253.01113891601562, "loss": 0.3711, "rewards/accuracies": 1.0, "rewards/chosen": -0.3569781184196472, "rewards/margins": 0.9029462933540344, "rewards/rejected": -1.2599244117736816, "step": 4041 }, { "epoch": 0.47, "learning_rate": 1.6265948729954348e-07, "logits/chosen": -2.478303909301758, "logits/rejected": -2.3725249767303467, "logps/chosen": -299.666748046875, "logps/rejected": -212.21331787109375, "loss": 0.2081, "rewards/accuracies": 1.0, "rewards/chosen": -0.29345524311065674, "rewards/margins": 2.0934982299804688, "rewards/rejected": -2.386953353881836, "step": 4042 }, { "epoch": 0.47, "learning_rate": 1.6262437082991924e-07, "logits/chosen": -3.449610710144043, "logits/rejected": -3.4573569297790527, "logps/chosen": -383.6758728027344, "logps/rejected": -286.091064453125, "loss": 0.5381, "rewards/accuracies": 0.5, "rewards/chosen": -0.5726088285446167, "rewards/margins": 1.9646403789520264, "rewards/rejected": -2.5372490882873535, "step": 4043 }, { "epoch": 0.47, "learning_rate": 1.6258925436029496e-07, "logits/chosen": -3.3067967891693115, "logits/rejected": -3.060866594314575, "logps/chosen": -274.7522277832031, "logps/rejected": -220.51214599609375, "loss": 0.5451, "rewards/accuracies": 0.75, "rewards/chosen": -0.07868346571922302, "rewards/margins": 0.9113374352455139, "rewards/rejected": -0.9900208711624146, "step": 4044 }, { "epoch": 0.47, "learning_rate": 1.6255413789067072e-07, "logits/chosen": -2.8935694694519043, "logits/rejected": -2.994480609893799, "logps/chosen": -194.4086456298828, "logps/rejected": -315.971923828125, "loss": 0.4751, "rewards/accuracies": 0.75, "rewards/chosen": -0.5314410924911499, "rewards/margins": 1.877898097038269, "rewards/rejected": -2.409339189529419, "step": 4045 }, { "epoch": 0.47, "learning_rate": 1.6251902142104647e-07, "logits/chosen": -3.7800559997558594, "logits/rejected": -3.804351806640625, "logps/chosen": -106.2520980834961, "logps/rejected": -224.29290771484375, "loss": 0.323, "rewards/accuracies": 0.875, "rewards/chosen": -0.02741996943950653, "rewards/margins": 1.7521636486053467, "rewards/rejected": -1.7795836925506592, "step": 4046 }, { "epoch": 0.47, "learning_rate": 1.624839049514222e-07, "logits/chosen": -3.0124661922454834, "logits/rejected": -3.0923705101013184, "logps/chosen": -332.7197570800781, "logps/rejected": -330.11163330078125, "loss": 0.2786, "rewards/accuracies": 0.75, "rewards/chosen": -0.49955371022224426, "rewards/margins": 3.0667972564697266, "rewards/rejected": -3.5663509368896484, "step": 4047 }, { "epoch": 0.47, "learning_rate": 1.6244878848179795e-07, "logits/chosen": -3.09550142288208, "logits/rejected": -3.2110066413879395, "logps/chosen": -331.6051330566406, "logps/rejected": -290.2023620605469, "loss": 0.2116, "rewards/accuracies": 0.875, "rewards/chosen": 0.03305875509977341, "rewards/margins": 1.997115135192871, "rewards/rejected": -1.9640564918518066, "step": 4048 }, { "epoch": 0.47, "learning_rate": 1.6241367201217368e-07, "logits/chosen": -3.4432365894317627, "logits/rejected": -3.538078784942627, "logps/chosen": -357.5598449707031, "logps/rejected": -222.01214599609375, "loss": 0.2483, "rewards/accuracies": 0.875, "rewards/chosen": -0.3937512934207916, "rewards/margins": 2.263476610183716, "rewards/rejected": -2.6572279930114746, "step": 4049 }, { "epoch": 0.47, "learning_rate": 1.6237855554254943e-07, "logits/chosen": -2.975454092025757, "logits/rejected": -2.8118350505828857, "logps/chosen": -200.75128173828125, "logps/rejected": -281.24603271484375, "loss": 0.4147, "rewards/accuracies": 0.875, "rewards/chosen": 0.2514498233795166, "rewards/margins": 1.8065667152404785, "rewards/rejected": -1.555116891860962, "step": 4050 }, { "epoch": 0.47, "learning_rate": 1.623434390729252e-07, "logits/chosen": -3.272108793258667, "logits/rejected": -2.8883962631225586, "logps/chosen": -265.5798645019531, "logps/rejected": -276.1944274902344, "loss": 0.6888, "rewards/accuracies": 0.75, "rewards/chosen": -0.42662376165390015, "rewards/margins": 0.7067745327949524, "rewards/rejected": -1.1333982944488525, "step": 4051 }, { "epoch": 0.47, "learning_rate": 1.6230832260330094e-07, "logits/chosen": -2.88247013092041, "logits/rejected": -2.8326401710510254, "logps/chosen": -166.92416381835938, "logps/rejected": -201.14125061035156, "loss": 0.974, "rewards/accuracies": 0.5, "rewards/chosen": -0.9785467982292175, "rewards/margins": -0.33031877875328064, "rewards/rejected": -0.6482280492782593, "step": 4052 }, { "epoch": 0.47, "learning_rate": 1.622732061336767e-07, "logits/chosen": -2.825571298599243, "logits/rejected": -2.823254108428955, "logps/chosen": -426.6119079589844, "logps/rejected": -314.38775634765625, "loss": 0.4751, "rewards/accuracies": 0.75, "rewards/chosen": -0.4948697090148926, "rewards/margins": 1.0519132614135742, "rewards/rejected": -1.5467829704284668, "step": 4053 }, { "epoch": 0.47, "learning_rate": 1.6223808966405245e-07, "logits/chosen": -2.5725345611572266, "logits/rejected": -2.6306893825531006, "logps/chosen": -389.2899169921875, "logps/rejected": -286.13812255859375, "loss": 0.3991, "rewards/accuracies": 1.0, "rewards/chosen": -0.11526241898536682, "rewards/margins": 0.9045605659484863, "rewards/rejected": -1.0198229551315308, "step": 4054 }, { "epoch": 0.47, "learning_rate": 1.6220297319442818e-07, "logits/chosen": -3.0268611907958984, "logits/rejected": -3.354990005493164, "logps/chosen": -323.03240966796875, "logps/rejected": -193.38967895507812, "loss": 0.4458, "rewards/accuracies": 0.875, "rewards/chosen": 0.4127781391143799, "rewards/margins": 2.2681076526641846, "rewards/rejected": -1.8553295135498047, "step": 4055 }, { "epoch": 0.47, "learning_rate": 1.6216785672480393e-07, "logits/chosen": -2.512786388397217, "logits/rejected": -2.458662509918213, "logps/chosen": -209.0631103515625, "logps/rejected": -304.8893127441406, "loss": 0.2517, "rewards/accuracies": 0.875, "rewards/chosen": -0.3590794503688812, "rewards/margins": 2.5722949504852295, "rewards/rejected": -2.9313745498657227, "step": 4056 }, { "epoch": 0.47, "learning_rate": 1.6213274025517966e-07, "logits/chosen": -2.889737844467163, "logits/rejected": -2.872379779815674, "logps/chosen": -337.4125061035156, "logps/rejected": -279.84716796875, "loss": 0.687, "rewards/accuracies": 0.75, "rewards/chosen": -0.0928000956773758, "rewards/margins": 1.4457640647888184, "rewards/rejected": -1.5385642051696777, "step": 4057 }, { "epoch": 0.47, "learning_rate": 1.620976237855554e-07, "logits/chosen": -2.735053062438965, "logits/rejected": -2.6983072757720947, "logps/chosen": -101.15076446533203, "logps/rejected": -160.7322540283203, "loss": 0.6409, "rewards/accuracies": 0.5, "rewards/chosen": -0.04400122910737991, "rewards/margins": 1.1571223735809326, "rewards/rejected": -1.201123595237732, "step": 4058 }, { "epoch": 0.47, "learning_rate": 1.6206250731593116e-07, "logits/chosen": -3.008755683898926, "logits/rejected": -2.788393497467041, "logps/chosen": -350.5355224609375, "logps/rejected": -200.05691528320312, "loss": 0.4711, "rewards/accuracies": 0.75, "rewards/chosen": -0.3124668598175049, "rewards/margins": 1.648129940032959, "rewards/rejected": -1.9605967998504639, "step": 4059 }, { "epoch": 0.47, "learning_rate": 1.620273908463069e-07, "logits/chosen": -2.4946846961975098, "logits/rejected": -2.316445827484131, "logps/chosen": -380.6108703613281, "logps/rejected": -169.24020385742188, "loss": 0.3064, "rewards/accuracies": 1.0, "rewards/chosen": -0.1611902266740799, "rewards/margins": 1.4213545322418213, "rewards/rejected": -1.5825448036193848, "step": 4060 }, { "epoch": 0.47, "learning_rate": 1.6199227437668265e-07, "logits/chosen": -3.5908048152923584, "logits/rejected": -3.109975576400757, "logps/chosen": -227.29116821289062, "logps/rejected": -201.41839599609375, "loss": 0.3521, "rewards/accuracies": 0.875, "rewards/chosen": -0.17357294261455536, "rewards/margins": 1.1250276565551758, "rewards/rejected": -1.2986005544662476, "step": 4061 }, { "epoch": 0.47, "learning_rate": 1.6195715790705843e-07, "logits/chosen": -3.623020648956299, "logits/rejected": -3.668264627456665, "logps/chosen": -299.7003173828125, "logps/rejected": -366.5696716308594, "loss": 0.1234, "rewards/accuracies": 1.0, "rewards/chosen": 0.737684428691864, "rewards/margins": 3.5832314491271973, "rewards/rejected": -2.8455467224121094, "step": 4062 }, { "epoch": 0.47, "learning_rate": 1.6192204143743415e-07, "logits/chosen": -3.3868188858032227, "logits/rejected": -3.1987342834472656, "logps/chosen": -241.6887969970703, "logps/rejected": -179.24917602539062, "loss": 0.2625, "rewards/accuracies": 0.75, "rewards/chosen": -0.029952004551887512, "rewards/margins": 2.0369181632995605, "rewards/rejected": -2.0668702125549316, "step": 4063 }, { "epoch": 0.47, "learning_rate": 1.618869249678099e-07, "logits/chosen": -2.64900541305542, "logits/rejected": -2.7251334190368652, "logps/chosen": -203.93685913085938, "logps/rejected": -172.60650634765625, "loss": 0.485, "rewards/accuracies": 0.625, "rewards/chosen": 0.13756267726421356, "rewards/margins": 1.1108112335205078, "rewards/rejected": -0.9732487201690674, "step": 4064 }, { "epoch": 0.47, "learning_rate": 1.6185180849818563e-07, "logits/chosen": -3.597430467605591, "logits/rejected": -3.70204496383667, "logps/chosen": -288.0597229003906, "logps/rejected": -352.9414367675781, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": 0.3724621534347534, "rewards/margins": 4.548531532287598, "rewards/rejected": -4.176069736480713, "step": 4065 }, { "epoch": 0.47, "learning_rate": 1.618166920285614e-07, "logits/chosen": -3.8972740173339844, "logits/rejected": -3.8633410930633545, "logps/chosen": -284.5651550292969, "logps/rejected": -345.77520751953125, "loss": 0.2906, "rewards/accuracies": 0.875, "rewards/chosen": 0.5070737600326538, "rewards/margins": 2.334310531616211, "rewards/rejected": -1.8272366523742676, "step": 4066 }, { "epoch": 0.47, "learning_rate": 1.6178157555893714e-07, "logits/chosen": -2.9364469051361084, "logits/rejected": -2.991001605987549, "logps/chosen": -283.2635803222656, "logps/rejected": -233.94052124023438, "loss": 0.2644, "rewards/accuracies": 0.875, "rewards/chosen": 0.49186983704566956, "rewards/margins": 2.050222396850586, "rewards/rejected": -1.5583525896072388, "step": 4067 }, { "epoch": 0.47, "learning_rate": 1.6174645908931287e-07, "logits/chosen": -3.2506465911865234, "logits/rejected": -3.0985665321350098, "logps/chosen": -322.5287170410156, "logps/rejected": -293.9366149902344, "loss": 0.3962, "rewards/accuracies": 0.75, "rewards/chosen": -0.3096959888935089, "rewards/margins": 1.9976541996002197, "rewards/rejected": -2.3073501586914062, "step": 4068 }, { "epoch": 0.47, "learning_rate": 1.6171134261968862e-07, "logits/chosen": -3.557079315185547, "logits/rejected": -3.2780210971832275, "logps/chosen": -335.9757080078125, "logps/rejected": -220.77828979492188, "loss": 0.3001, "rewards/accuracies": 0.75, "rewards/chosen": -0.05314311757683754, "rewards/margins": 3.1939144134521484, "rewards/rejected": -3.2470574378967285, "step": 4069 }, { "epoch": 0.47, "learning_rate": 1.6167622615006438e-07, "logits/chosen": -2.7959022521972656, "logits/rejected": -2.9880192279815674, "logps/chosen": -289.2069091796875, "logps/rejected": -202.97283935546875, "loss": 0.2452, "rewards/accuracies": 0.875, "rewards/chosen": 0.018633365631103516, "rewards/margins": 1.8230901956558228, "rewards/rejected": -1.8044568300247192, "step": 4070 }, { "epoch": 0.47, "learning_rate": 1.616411096804401e-07, "logits/chosen": -2.9110167026519775, "logits/rejected": -2.675739288330078, "logps/chosen": -277.39910888671875, "logps/rejected": -203.64476013183594, "loss": 0.7284, "rewards/accuracies": 0.5, "rewards/chosen": -0.16778340935707092, "rewards/margins": 1.4668214321136475, "rewards/rejected": -1.6346049308776855, "step": 4071 }, { "epoch": 0.47, "learning_rate": 1.6160599321081586e-07, "logits/chosen": -3.600123882293701, "logits/rejected": -3.3775577545166016, "logps/chosen": -251.54457092285156, "logps/rejected": -176.36343383789062, "loss": 0.1712, "rewards/accuracies": 1.0, "rewards/chosen": 0.3572041690349579, "rewards/margins": 2.502413034439087, "rewards/rejected": -2.1452085971832275, "step": 4072 }, { "epoch": 0.47, "learning_rate": 1.6157087674119159e-07, "logits/chosen": -2.7266476154327393, "logits/rejected": -2.6865971088409424, "logps/chosen": -108.95465087890625, "logps/rejected": -291.3143005371094, "loss": 0.4454, "rewards/accuracies": 0.75, "rewards/chosen": 0.0013402849435806274, "rewards/margins": 2.469937801361084, "rewards/rejected": -2.468597412109375, "step": 4073 }, { "epoch": 0.47, "learning_rate": 1.6153576027156737e-07, "logits/chosen": -3.0622568130493164, "logits/rejected": -3.217190742492676, "logps/chosen": -185.26524353027344, "logps/rejected": -263.9554138183594, "loss": 0.3446, "rewards/accuracies": 0.875, "rewards/chosen": -0.061963994055986404, "rewards/margins": 3.2054402828216553, "rewards/rejected": -3.267404079437256, "step": 4074 }, { "epoch": 0.47, "learning_rate": 1.6150064380194312e-07, "logits/chosen": -2.65216064453125, "logits/rejected": -2.2063989639282227, "logps/chosen": -252.00613403320312, "logps/rejected": -191.04595947265625, "loss": 0.3888, "rewards/accuracies": 0.875, "rewards/chosen": 0.3214179575443268, "rewards/margins": 1.7928763628005981, "rewards/rejected": -1.4714584350585938, "step": 4075 }, { "epoch": 0.47, "learning_rate": 1.6146552733231885e-07, "logits/chosen": -2.9533815383911133, "logits/rejected": -2.2752525806427, "logps/chosen": -442.67108154296875, "logps/rejected": -295.9713134765625, "loss": 0.3278, "rewards/accuracies": 0.875, "rewards/chosen": 0.13567256927490234, "rewards/margins": 1.4763846397399902, "rewards/rejected": -1.3407119512557983, "step": 4076 }, { "epoch": 0.47, "learning_rate": 1.614304108626946e-07, "logits/chosen": -3.2652177810668945, "logits/rejected": -2.9810709953308105, "logps/chosen": -226.9775848388672, "logps/rejected": -292.7625732421875, "loss": 0.175, "rewards/accuracies": 1.0, "rewards/chosen": 0.17412106692790985, "rewards/margins": 2.4508068561553955, "rewards/rejected": -2.2766857147216797, "step": 4077 }, { "epoch": 0.47, "learning_rate": 1.6139529439307036e-07, "logits/chosen": -3.5296740531921387, "logits/rejected": -3.809813976287842, "logps/chosen": -363.9915771484375, "logps/rejected": -237.93887329101562, "loss": 0.464, "rewards/accuracies": 0.875, "rewards/chosen": -0.13368822634220123, "rewards/margins": 1.0918710231781006, "rewards/rejected": -1.2255592346191406, "step": 4078 }, { "epoch": 0.47, "learning_rate": 1.6136017792344608e-07, "logits/chosen": -3.5791029930114746, "logits/rejected": -3.2449162006378174, "logps/chosen": -372.3717956542969, "logps/rejected": -319.799560546875, "loss": 0.2589, "rewards/accuracies": 1.0, "rewards/chosen": 0.0651768147945404, "rewards/margins": 2.55277156829834, "rewards/rejected": -2.4875946044921875, "step": 4079 }, { "epoch": 0.47, "learning_rate": 1.6132506145382184e-07, "logits/chosen": -3.2799036502838135, "logits/rejected": -3.2424380779266357, "logps/chosen": -478.509033203125, "logps/rejected": -283.6539306640625, "loss": 0.5316, "rewards/accuracies": 0.75, "rewards/chosen": -0.2687143385410309, "rewards/margins": 0.6766458749771118, "rewards/rejected": -0.9453601837158203, "step": 4080 }, { "epoch": 0.47, "learning_rate": 1.6128994498419756e-07, "logits/chosen": -3.247452735900879, "logits/rejected": -3.2633485794067383, "logps/chosen": -266.0584716796875, "logps/rejected": -273.18243408203125, "loss": 0.188, "rewards/accuracies": 1.0, "rewards/chosen": 0.3341996669769287, "rewards/margins": 2.7335519790649414, "rewards/rejected": -2.3993520736694336, "step": 4081 }, { "epoch": 0.47, "learning_rate": 1.6125482851457332e-07, "logits/chosen": -2.675589084625244, "logits/rejected": -2.3968091011047363, "logps/chosen": -280.9840393066406, "logps/rejected": -211.6583709716797, "loss": 0.3901, "rewards/accuracies": 0.75, "rewards/chosen": -0.4033336043357849, "rewards/margins": 1.4458403587341309, "rewards/rejected": -1.8491740226745605, "step": 4082 }, { "epoch": 0.47, "learning_rate": 1.612197120449491e-07, "logits/chosen": -3.692573070526123, "logits/rejected": -3.7004177570343018, "logps/chosen": -169.731689453125, "logps/rejected": -210.74913024902344, "loss": 0.4997, "rewards/accuracies": 0.75, "rewards/chosen": -0.5497320294380188, "rewards/margins": 2.31565260887146, "rewards/rejected": -2.865384578704834, "step": 4083 }, { "epoch": 0.47, "learning_rate": 1.611845955753248e-07, "logits/chosen": -2.482783317565918, "logits/rejected": -2.4182682037353516, "logps/chosen": -416.8567810058594, "logps/rejected": -390.0892333984375, "loss": 0.3689, "rewards/accuracies": 0.625, "rewards/chosen": -0.4432322382926941, "rewards/margins": 3.286282539367676, "rewards/rejected": -3.7295150756835938, "step": 4084 }, { "epoch": 0.47, "learning_rate": 1.6114947910570058e-07, "logits/chosen": -2.671804904937744, "logits/rejected": -2.845076560974121, "logps/chosen": -445.1983947753906, "logps/rejected": -451.55914306640625, "loss": 0.3453, "rewards/accuracies": 0.875, "rewards/chosen": 0.11086338758468628, "rewards/margins": 2.0655102729797363, "rewards/rejected": -1.9546468257904053, "step": 4085 }, { "epoch": 0.47, "learning_rate": 1.6111436263607633e-07, "logits/chosen": -3.120803117752075, "logits/rejected": -3.0286331176757812, "logps/chosen": -210.17247009277344, "logps/rejected": -160.3361358642578, "loss": 0.2603, "rewards/accuracies": 0.875, "rewards/chosen": 0.1874322146177292, "rewards/margins": 2.1447460651397705, "rewards/rejected": -1.9573140144348145, "step": 4086 }, { "epoch": 0.47, "learning_rate": 1.6107924616645206e-07, "logits/chosen": -3.103238582611084, "logits/rejected": -2.979245662689209, "logps/chosen": -228.33238220214844, "logps/rejected": -174.96453857421875, "loss": 0.2908, "rewards/accuracies": 1.0, "rewards/chosen": -0.3574143946170807, "rewards/margins": 1.6387567520141602, "rewards/rejected": -1.9961711168289185, "step": 4087 }, { "epoch": 0.47, "learning_rate": 1.6104412969682781e-07, "logits/chosen": -2.671816825866699, "logits/rejected": -2.587730884552002, "logps/chosen": -402.6905517578125, "logps/rejected": -281.73260498046875, "loss": 0.2517, "rewards/accuracies": 1.0, "rewards/chosen": -0.22346174716949463, "rewards/margins": 1.6898080110549927, "rewards/rejected": -1.9132696390151978, "step": 4088 }, { "epoch": 0.47, "learning_rate": 1.6100901322720354e-07, "logits/chosen": -4.099849224090576, "logits/rejected": -3.711578845977783, "logps/chosen": -255.94647216796875, "logps/rejected": -243.58377075195312, "loss": 0.2448, "rewards/accuracies": 0.875, "rewards/chosen": -0.08249664306640625, "rewards/margins": 2.994204044342041, "rewards/rejected": -3.0767006874084473, "step": 4089 }, { "epoch": 0.47, "learning_rate": 1.609738967575793e-07, "logits/chosen": -2.45304799079895, "logits/rejected": -2.3688225746154785, "logps/chosen": -416.39617919921875, "logps/rejected": -447.3258972167969, "loss": 0.4749, "rewards/accuracies": 0.75, "rewards/chosen": -0.28735458850860596, "rewards/margins": 1.0852411985397339, "rewards/rejected": -1.3725957870483398, "step": 4090 }, { "epoch": 0.47, "learning_rate": 1.6093878028795505e-07, "logits/chosen": -3.4454197883605957, "logits/rejected": -3.4913885593414307, "logps/chosen": -263.5713806152344, "logps/rejected": -252.53358459472656, "loss": 0.1609, "rewards/accuracies": 1.0, "rewards/chosen": 0.10522471368312836, "rewards/margins": 2.573647975921631, "rewards/rejected": -2.4684231281280518, "step": 4091 }, { "epoch": 0.47, "learning_rate": 1.6090366381833078e-07, "logits/chosen": -3.3793463706970215, "logits/rejected": -3.083359956741333, "logps/chosen": -244.01907348632812, "logps/rejected": -196.00689697265625, "loss": 0.5957, "rewards/accuracies": 0.75, "rewards/chosen": -0.3730229437351227, "rewards/margins": 0.6069237589836121, "rewards/rejected": -0.9799466729164124, "step": 4092 }, { "epoch": 0.47, "learning_rate": 1.6086854734870653e-07, "logits/chosen": -3.5456299781799316, "logits/rejected": -3.5872933864593506, "logps/chosen": -249.104248046875, "logps/rejected": -154.18389892578125, "loss": 0.3177, "rewards/accuracies": 0.75, "rewards/chosen": 0.22897014021873474, "rewards/margins": 1.8333407640457153, "rewards/rejected": -1.6043705940246582, "step": 4093 }, { "epoch": 0.47, "learning_rate": 1.6083343087908226e-07, "logits/chosen": -2.6931679248809814, "logits/rejected": -2.9569320678710938, "logps/chosen": -281.9060363769531, "logps/rejected": -203.78485107421875, "loss": 0.3851, "rewards/accuracies": 0.75, "rewards/chosen": 0.03623540699481964, "rewards/margins": 1.7937355041503906, "rewards/rejected": -1.7575000524520874, "step": 4094 }, { "epoch": 0.47, "learning_rate": 1.60798314409458e-07, "logits/chosen": -3.352503776550293, "logits/rejected": -3.1573548316955566, "logps/chosen": -264.0829772949219, "logps/rejected": -190.2379150390625, "loss": 0.6422, "rewards/accuracies": 0.75, "rewards/chosen": 0.30534741282463074, "rewards/margins": 0.5134832859039307, "rewards/rejected": -0.20813587307929993, "step": 4095 }, { "epoch": 0.47, "learning_rate": 1.607631979398338e-07, "logits/chosen": -3.5181140899658203, "logits/rejected": -3.5778889656066895, "logps/chosen": -214.92332458496094, "logps/rejected": -171.6587677001953, "loss": 0.6932, "rewards/accuracies": 0.75, "rewards/chosen": -0.4388342499732971, "rewards/margins": 0.4311586916446686, "rewards/rejected": -0.8699929714202881, "step": 4096 }, { "epoch": 0.47, "learning_rate": 1.6072808147020952e-07, "logits/chosen": -3.6982994079589844, "logits/rejected": -3.240255832672119, "logps/chosen": -334.9539794921875, "logps/rejected": -226.12261962890625, "loss": 0.2375, "rewards/accuracies": 0.875, "rewards/chosen": 0.05106744170188904, "rewards/margins": 2.8452961444854736, "rewards/rejected": -2.7942285537719727, "step": 4097 }, { "epoch": 0.47, "learning_rate": 1.6069296500058527e-07, "logits/chosen": -3.297924757003784, "logits/rejected": -3.1629793643951416, "logps/chosen": -153.66806030273438, "logps/rejected": -145.23361206054688, "loss": 0.3355, "rewards/accuracies": 1.0, "rewards/chosen": -0.45878908038139343, "rewards/margins": 1.1152031421661377, "rewards/rejected": -1.5739922523498535, "step": 4098 }, { "epoch": 0.47, "learning_rate": 1.6065784853096103e-07, "logits/chosen": -2.524756908416748, "logits/rejected": -2.5333447456359863, "logps/chosen": -210.82962036132812, "logps/rejected": -307.5135803222656, "loss": 0.2457, "rewards/accuracies": 1.0, "rewards/chosen": -0.24274808168411255, "rewards/margins": 3.9773449897766113, "rewards/rejected": -4.220093250274658, "step": 4099 }, { "epoch": 0.47, "learning_rate": 1.6062273206133675e-07, "logits/chosen": -2.4171037673950195, "logits/rejected": -2.5284361839294434, "logps/chosen": -149.39317321777344, "logps/rejected": -236.36993408203125, "loss": 0.2511, "rewards/accuracies": 0.875, "rewards/chosen": 0.008223028853535652, "rewards/margins": 2.354922294616699, "rewards/rejected": -2.3466992378234863, "step": 4100 }, { "epoch": 0.47, "learning_rate": 1.605876155917125e-07, "logits/chosen": -3.033402919769287, "logits/rejected": -2.9349896907806396, "logps/chosen": -256.25823974609375, "logps/rejected": -318.106689453125, "loss": 0.1769, "rewards/accuracies": 1.0, "rewards/chosen": 0.6802597045898438, "rewards/margins": 2.5853726863861084, "rewards/rejected": -1.9051129817962646, "step": 4101 }, { "epoch": 0.47, "learning_rate": 1.6055249912208824e-07, "logits/chosen": -3.7346994876861572, "logits/rejected": -3.3292951583862305, "logps/chosen": -382.4684753417969, "logps/rejected": -221.89723205566406, "loss": 0.3642, "rewards/accuracies": 0.875, "rewards/chosen": -0.239411398768425, "rewards/margins": 1.4270793199539185, "rewards/rejected": -1.6664905548095703, "step": 4102 }, { "epoch": 0.47, "learning_rate": 1.60517382652464e-07, "logits/chosen": -3.4008290767669678, "logits/rejected": -3.7134299278259277, "logps/chosen": -116.62904357910156, "logps/rejected": -173.69943237304688, "loss": 0.4707, "rewards/accuracies": 0.875, "rewards/chosen": 0.005463123321533203, "rewards/margins": 2.4211981296539307, "rewards/rejected": -2.4157350063323975, "step": 4103 }, { "epoch": 0.47, "learning_rate": 1.6048226618283974e-07, "logits/chosen": -2.603105306625366, "logits/rejected": -2.3759660720825195, "logps/chosen": -182.88177490234375, "logps/rejected": -273.3739013671875, "loss": 0.187, "rewards/accuracies": 1.0, "rewards/chosen": -0.014993328601121902, "rewards/margins": 3.114842414855957, "rewards/rejected": -3.129835605621338, "step": 4104 }, { "epoch": 0.47, "learning_rate": 1.6044714971321547e-07, "logits/chosen": -2.8716862201690674, "logits/rejected": -2.983189582824707, "logps/chosen": -280.8775634765625, "logps/rejected": -383.16815185546875, "loss": 0.7401, "rewards/accuracies": 0.5, "rewards/chosen": -0.5399225354194641, "rewards/margins": 1.2310549020767212, "rewards/rejected": -1.77097749710083, "step": 4105 }, { "epoch": 0.47, "learning_rate": 1.6041203324359123e-07, "logits/chosen": -3.60548734664917, "logits/rejected": -3.5654544830322266, "logps/chosen": -316.3787841796875, "logps/rejected": -498.14739990234375, "loss": 0.3206, "rewards/accuracies": 0.75, "rewards/chosen": -0.43731316924095154, "rewards/margins": 2.5786402225494385, "rewards/rejected": -3.015953540802002, "step": 4106 }, { "epoch": 0.47, "learning_rate": 1.60376916773967e-07, "logits/chosen": -3.241730213165283, "logits/rejected": -2.9741666316986084, "logps/chosen": -314.63922119140625, "logps/rejected": -257.5995178222656, "loss": 0.3159, "rewards/accuracies": 0.75, "rewards/chosen": -0.3174893260002136, "rewards/margins": 1.512473225593567, "rewards/rejected": -1.8299624919891357, "step": 4107 }, { "epoch": 0.47, "learning_rate": 1.6034180030434273e-07, "logits/chosen": -3.2382211685180664, "logits/rejected": -3.398634195327759, "logps/chosen": -372.91314697265625, "logps/rejected": -336.0079650878906, "loss": 0.3088, "rewards/accuracies": 0.875, "rewards/chosen": 0.08390974998474121, "rewards/margins": 2.9107468128204346, "rewards/rejected": -2.8268370628356934, "step": 4108 }, { "epoch": 0.47, "learning_rate": 1.6030668383471849e-07, "logits/chosen": -3.787719964981079, "logits/rejected": -3.631359577178955, "logps/chosen": -194.42880249023438, "logps/rejected": -205.24838256835938, "loss": 0.5481, "rewards/accuracies": 0.625, "rewards/chosen": -0.25613611936569214, "rewards/margins": 0.5395616888999939, "rewards/rejected": -0.795697808265686, "step": 4109 }, { "epoch": 0.47, "learning_rate": 1.6027156736509421e-07, "logits/chosen": -2.9726340770721436, "logits/rejected": -3.054720878601074, "logps/chosen": -134.62191772460938, "logps/rejected": -175.95867919921875, "loss": 0.3708, "rewards/accuracies": 0.875, "rewards/chosen": -0.16966161131858826, "rewards/margins": 1.9331179857254028, "rewards/rejected": -2.1027796268463135, "step": 4110 }, { "epoch": 0.47, "learning_rate": 1.6023645089546997e-07, "logits/chosen": -2.5126664638519287, "logits/rejected": -2.7312824726104736, "logps/chosen": -323.19830322265625, "logps/rejected": -249.21018981933594, "loss": 0.4371, "rewards/accuracies": 0.625, "rewards/chosen": -0.056439101696014404, "rewards/margins": 1.408325433731079, "rewards/rejected": -1.4647647142410278, "step": 4111 }, { "epoch": 0.47, "learning_rate": 1.6020133442584572e-07, "logits/chosen": -3.5030529499053955, "logits/rejected": -3.299456834793091, "logps/chosen": -305.9978942871094, "logps/rejected": -175.88290405273438, "loss": 0.5824, "rewards/accuracies": 0.75, "rewards/chosen": 0.02139461040496826, "rewards/margins": 1.480688452720642, "rewards/rejected": -1.4592939615249634, "step": 4112 }, { "epoch": 0.47, "learning_rate": 1.6016621795622145e-07, "logits/chosen": -3.2485289573669434, "logits/rejected": -3.3496713638305664, "logps/chosen": -208.0928955078125, "logps/rejected": -373.43157958984375, "loss": 0.3243, "rewards/accuracies": 0.875, "rewards/chosen": -0.041187748312950134, "rewards/margins": 1.6766462326049805, "rewards/rejected": -1.7178339958190918, "step": 4113 }, { "epoch": 0.47, "learning_rate": 1.601311014865972e-07, "logits/chosen": -2.4665327072143555, "logits/rejected": -2.4878625869750977, "logps/chosen": -285.83404541015625, "logps/rejected": -289.4356689453125, "loss": 0.6585, "rewards/accuracies": 0.625, "rewards/chosen": -0.498344361782074, "rewards/margins": 0.5160529017448425, "rewards/rejected": -1.014397144317627, "step": 4114 }, { "epoch": 0.47, "learning_rate": 1.6009598501697296e-07, "logits/chosen": -3.664306879043579, "logits/rejected": -3.5734822750091553, "logps/chosen": -291.3409729003906, "logps/rejected": -260.845458984375, "loss": 0.7286, "rewards/accuracies": 0.75, "rewards/chosen": -0.601740837097168, "rewards/margins": 1.6911418437957764, "rewards/rejected": -2.2928826808929443, "step": 4115 }, { "epoch": 0.47, "learning_rate": 1.6006086854734868e-07, "logits/chosen": -3.783968448638916, "logits/rejected": -3.341583490371704, "logps/chosen": -398.2119445800781, "logps/rejected": -251.64190673828125, "loss": 0.7566, "rewards/accuracies": 0.75, "rewards/chosen": -0.5985844135284424, "rewards/margins": 1.6096915006637573, "rewards/rejected": -2.20827579498291, "step": 4116 }, { "epoch": 0.47, "learning_rate": 1.6002575207772446e-07, "logits/chosen": -3.352285146713257, "logits/rejected": -3.6951515674591064, "logps/chosen": -281.4228515625, "logps/rejected": -219.48483276367188, "loss": 0.4767, "rewards/accuracies": 0.75, "rewards/chosen": -0.3193380534648895, "rewards/margins": 1.5731438398361206, "rewards/rejected": -1.8924819231033325, "step": 4117 }, { "epoch": 0.47, "learning_rate": 1.5999063560810017e-07, "logits/chosen": -3.0560989379882812, "logits/rejected": -2.8971729278564453, "logps/chosen": -240.57003784179688, "logps/rejected": -314.08380126953125, "loss": 0.5187, "rewards/accuracies": 0.75, "rewards/chosen": -0.1691247820854187, "rewards/margins": 1.8293938636779785, "rewards/rejected": -1.9985185861587524, "step": 4118 }, { "epoch": 0.47, "learning_rate": 1.5995551913847595e-07, "logits/chosen": -3.1037588119506836, "logits/rejected": -2.705632448196411, "logps/chosen": -402.21563720703125, "logps/rejected": -251.33465576171875, "loss": 0.3586, "rewards/accuracies": 0.875, "rewards/chosen": -0.07259905338287354, "rewards/margins": 1.6625372171401978, "rewards/rejected": -1.7351361513137817, "step": 4119 }, { "epoch": 0.47, "learning_rate": 1.599204026688517e-07, "logits/chosen": -2.960047483444214, "logits/rejected": -2.8574070930480957, "logps/chosen": -308.9866943359375, "logps/rejected": -355.22259521484375, "loss": 0.4589, "rewards/accuracies": 0.625, "rewards/chosen": 0.06520962715148926, "rewards/margins": 1.3070883750915527, "rewards/rejected": -1.2418787479400635, "step": 4120 }, { "epoch": 0.48, "learning_rate": 1.5988528619922743e-07, "logits/chosen": -3.3280909061431885, "logits/rejected": -2.9432976245880127, "logps/chosen": -602.5799560546875, "logps/rejected": -329.9744567871094, "loss": 0.1425, "rewards/accuracies": 1.0, "rewards/chosen": -0.3884144723415375, "rewards/margins": 2.5448899269104004, "rewards/rejected": -2.9333043098449707, "step": 4121 }, { "epoch": 0.48, "learning_rate": 1.5985016972960318e-07, "logits/chosen": -2.320772409439087, "logits/rejected": -2.4083003997802734, "logps/chosen": -193.960693359375, "logps/rejected": -225.5411834716797, "loss": 0.4921, "rewards/accuracies": 0.875, "rewards/chosen": -0.014483049511909485, "rewards/margins": 2.1187217235565186, "rewards/rejected": -2.133204936981201, "step": 4122 }, { "epoch": 0.48, "learning_rate": 1.5981505325997893e-07, "logits/chosen": -2.848829746246338, "logits/rejected": -2.967329978942871, "logps/chosen": -286.685546875, "logps/rejected": -406.13458251953125, "loss": 0.4769, "rewards/accuracies": 0.625, "rewards/chosen": -0.6589688062667847, "rewards/margins": 3.739100933074951, "rewards/rejected": -4.398069858551025, "step": 4123 }, { "epoch": 0.48, "learning_rate": 1.5977993679035466e-07, "logits/chosen": -2.493645668029785, "logits/rejected": -2.925797939300537, "logps/chosen": -167.31866455078125, "logps/rejected": -182.39801025390625, "loss": 0.3424, "rewards/accuracies": 0.75, "rewards/chosen": 0.24861693382263184, "rewards/margins": 2.0535411834716797, "rewards/rejected": -1.8049243688583374, "step": 4124 }, { "epoch": 0.48, "learning_rate": 1.5974482032073042e-07, "logits/chosen": -3.050299644470215, "logits/rejected": -2.539034843444824, "logps/chosen": -304.99285888671875, "logps/rejected": -212.04092407226562, "loss": 0.5065, "rewards/accuracies": 0.625, "rewards/chosen": 0.12635576725006104, "rewards/margins": 0.7090057134628296, "rewards/rejected": -0.5826498866081238, "step": 4125 }, { "epoch": 0.48, "learning_rate": 1.5970970385110614e-07, "logits/chosen": -2.83903431892395, "logits/rejected": -3.001051902770996, "logps/chosen": -286.70294189453125, "logps/rejected": -242.54562377929688, "loss": 0.3713, "rewards/accuracies": 0.75, "rewards/chosen": 0.5777262449264526, "rewards/margins": 1.608403205871582, "rewards/rejected": -1.0306769609451294, "step": 4126 }, { "epoch": 0.48, "learning_rate": 1.596745873814819e-07, "logits/chosen": -3.3446526527404785, "logits/rejected": -2.9687626361846924, "logps/chosen": -317.37518310546875, "logps/rejected": -303.0245361328125, "loss": 0.4259, "rewards/accuracies": 0.875, "rewards/chosen": 0.22034157812595367, "rewards/margins": 1.0035682916641235, "rewards/rejected": -0.783226728439331, "step": 4127 }, { "epoch": 0.48, "learning_rate": 1.5963947091185768e-07, "logits/chosen": -2.7587926387786865, "logits/rejected": -2.4652693271636963, "logps/chosen": -372.4881286621094, "logps/rejected": -443.6074523925781, "loss": 0.3648, "rewards/accuracies": 0.75, "rewards/chosen": 0.8866021633148193, "rewards/margins": 1.984591007232666, "rewards/rejected": -1.0979889631271362, "step": 4128 }, { "epoch": 0.48, "learning_rate": 1.5960435444223338e-07, "logits/chosen": -3.317525863647461, "logits/rejected": -3.098708391189575, "logps/chosen": -150.4154052734375, "logps/rejected": -237.29885864257812, "loss": 0.4032, "rewards/accuracies": 0.75, "rewards/chosen": -0.16487836837768555, "rewards/margins": 1.7032475471496582, "rewards/rejected": -1.8681259155273438, "step": 4129 }, { "epoch": 0.48, "learning_rate": 1.5956923797260916e-07, "logits/chosen": -3.9482340812683105, "logits/rejected": -3.6534006595611572, "logps/chosen": -188.116455078125, "logps/rejected": -210.06655883789062, "loss": 0.5729, "rewards/accuracies": 0.625, "rewards/chosen": -0.29493212699890137, "rewards/margins": 0.8616693019866943, "rewards/rejected": -1.1566014289855957, "step": 4130 }, { "epoch": 0.48, "learning_rate": 1.595341215029849e-07, "logits/chosen": -3.325497627258301, "logits/rejected": -3.370974063873291, "logps/chosen": -221.25601196289062, "logps/rejected": -348.0758361816406, "loss": 0.2662, "rewards/accuracies": 0.875, "rewards/chosen": -0.058750346302986145, "rewards/margins": 2.4744784832000732, "rewards/rejected": -2.533228874206543, "step": 4131 }, { "epoch": 0.48, "learning_rate": 1.5949900503336064e-07, "logits/chosen": -2.582530975341797, "logits/rejected": -2.632814884185791, "logps/chosen": -352.6424255371094, "logps/rejected": -379.5065002441406, "loss": 0.3629, "rewards/accuracies": 0.875, "rewards/chosen": 0.013519465923309326, "rewards/margins": 1.4324030876159668, "rewards/rejected": -1.4188838005065918, "step": 4132 }, { "epoch": 0.48, "learning_rate": 1.594638885637364e-07, "logits/chosen": -3.787912368774414, "logits/rejected": -3.5043094158172607, "logps/chosen": -302.6733703613281, "logps/rejected": -200.42913818359375, "loss": 0.2626, "rewards/accuracies": 0.875, "rewards/chosen": -0.058378592133522034, "rewards/margins": 1.7218562364578247, "rewards/rejected": -1.7802350521087646, "step": 4133 }, { "epoch": 0.48, "learning_rate": 1.5942877209411212e-07, "logits/chosen": -3.1063055992126465, "logits/rejected": -3.1047165393829346, "logps/chosen": -197.04356384277344, "logps/rejected": -265.9906311035156, "loss": 0.1482, "rewards/accuracies": 1.0, "rewards/chosen": 0.7949743270874023, "rewards/margins": 2.5004003047943115, "rewards/rejected": -1.7054260969161987, "step": 4134 }, { "epoch": 0.48, "learning_rate": 1.5939365562448788e-07, "logits/chosen": -3.0185294151306152, "logits/rejected": -2.831360101699829, "logps/chosen": -169.4357147216797, "logps/rejected": -115.54486083984375, "loss": 0.7575, "rewards/accuracies": 0.375, "rewards/chosen": -0.4770050346851349, "rewards/margins": -0.014374271035194397, "rewards/rejected": -0.4626307487487793, "step": 4135 }, { "epoch": 0.48, "learning_rate": 1.5935853915486363e-07, "logits/chosen": -3.0436177253723145, "logits/rejected": -2.4734368324279785, "logps/chosen": -361.73236083984375, "logps/rejected": -186.24891662597656, "loss": 0.4325, "rewards/accuracies": 0.625, "rewards/chosen": 0.3524041175842285, "rewards/margins": 1.2923951148986816, "rewards/rejected": -0.9399910569190979, "step": 4136 }, { "epoch": 0.48, "learning_rate": 1.5932342268523936e-07, "logits/chosen": -3.2745347023010254, "logits/rejected": -2.905576705932617, "logps/chosen": -260.3238830566406, "logps/rejected": -167.7847900390625, "loss": 0.8012, "rewards/accuracies": 0.875, "rewards/chosen": 0.13937358558177948, "rewards/margins": 0.9905533194541931, "rewards/rejected": -0.851179838180542, "step": 4137 }, { "epoch": 0.48, "learning_rate": 1.592883062156151e-07, "logits/chosen": -2.9699742794036865, "logits/rejected": -2.882106065750122, "logps/chosen": -359.4383544921875, "logps/rejected": -257.7856140136719, "loss": 0.5024, "rewards/accuracies": 0.625, "rewards/chosen": -0.3787563741207123, "rewards/margins": 0.6818729639053345, "rewards/rejected": -1.0606292486190796, "step": 4138 }, { "epoch": 0.48, "learning_rate": 1.592531897459909e-07, "logits/chosen": -2.567288875579834, "logits/rejected": -2.5036728382110596, "logps/chosen": -226.12098693847656, "logps/rejected": -226.9105224609375, "loss": 0.1854, "rewards/accuracies": 1.0, "rewards/chosen": -0.006817393004894257, "rewards/margins": 2.6141862869262695, "rewards/rejected": -2.6210036277770996, "step": 4139 }, { "epoch": 0.48, "learning_rate": 1.592180732763666e-07, "logits/chosen": -3.2374706268310547, "logits/rejected": -3.300962448120117, "logps/chosen": -130.572998046875, "logps/rejected": -254.7808837890625, "loss": 0.3892, "rewards/accuracies": 0.875, "rewards/chosen": -0.5896345376968384, "rewards/margins": 1.9184602499008179, "rewards/rejected": -2.5080947875976562, "step": 4140 }, { "epoch": 0.48, "learning_rate": 1.5918295680674237e-07, "logits/chosen": -3.3762576580047607, "logits/rejected": -3.0574512481689453, "logps/chosen": -347.9324645996094, "logps/rejected": -264.5272216796875, "loss": 0.5426, "rewards/accuracies": 0.625, "rewards/chosen": -0.21936669945716858, "rewards/margins": 1.2588984966278076, "rewards/rejected": -1.4782652854919434, "step": 4141 }, { "epoch": 0.48, "learning_rate": 1.591478403371181e-07, "logits/chosen": -2.7622249126434326, "logits/rejected": -3.0797410011291504, "logps/chosen": -132.38650512695312, "logps/rejected": -354.5783996582031, "loss": 0.1921, "rewards/accuracies": 1.0, "rewards/chosen": 0.18097130954265594, "rewards/margins": 2.9941561222076416, "rewards/rejected": -2.813184976577759, "step": 4142 }, { "epoch": 0.48, "learning_rate": 1.5911272386749385e-07, "logits/chosen": -3.6517276763916016, "logits/rejected": -3.1498935222625732, "logps/chosen": -277.0365295410156, "logps/rejected": -302.8590393066406, "loss": 0.4745, "rewards/accuracies": 0.625, "rewards/chosen": -0.1383552849292755, "rewards/margins": 1.5517141819000244, "rewards/rejected": -1.690069556236267, "step": 4143 }, { "epoch": 0.48, "learning_rate": 1.590776073978696e-07, "logits/chosen": -2.9874038696289062, "logits/rejected": -2.884500026702881, "logps/chosen": -249.96299743652344, "logps/rejected": -307.10614013671875, "loss": 0.2875, "rewards/accuracies": 1.0, "rewards/chosen": -0.23816834390163422, "rewards/margins": 1.740641474723816, "rewards/rejected": -1.9788098335266113, "step": 4144 }, { "epoch": 0.48, "learning_rate": 1.5904249092824533e-07, "logits/chosen": -2.375847339630127, "logits/rejected": -2.6630895137786865, "logps/chosen": -315.50909423828125, "logps/rejected": -195.85581970214844, "loss": 0.503, "rewards/accuracies": 0.625, "rewards/chosen": -0.13839535415172577, "rewards/margins": 0.7343431711196899, "rewards/rejected": -0.8727384805679321, "step": 4145 }, { "epoch": 0.48, "learning_rate": 1.590073744586211e-07, "logits/chosen": -3.063681125640869, "logits/rejected": -3.191713571548462, "logps/chosen": -280.7762451171875, "logps/rejected": -333.161865234375, "loss": 0.0931, "rewards/accuracies": 1.0, "rewards/chosen": 0.795738160610199, "rewards/margins": 4.155856132507324, "rewards/rejected": -3.3601179122924805, "step": 4146 }, { "epoch": 0.48, "learning_rate": 1.5897225798899682e-07, "logits/chosen": -3.421813726425171, "logits/rejected": -3.0845956802368164, "logps/chosen": -227.12229919433594, "logps/rejected": -194.84848022460938, "loss": 0.4495, "rewards/accuracies": 0.875, "rewards/chosen": 0.004799067974090576, "rewards/margins": 1.5598645210266113, "rewards/rejected": -1.5550655126571655, "step": 4147 }, { "epoch": 0.48, "learning_rate": 1.5893714151937257e-07, "logits/chosen": -2.4708926677703857, "logits/rejected": -2.5168709754943848, "logps/chosen": -157.74334716796875, "logps/rejected": -294.7514953613281, "loss": 0.4194, "rewards/accuracies": 0.625, "rewards/chosen": 0.2014731466770172, "rewards/margins": 1.6973552703857422, "rewards/rejected": -1.4958820343017578, "step": 4148 }, { "epoch": 0.48, "learning_rate": 1.5890202504974832e-07, "logits/chosen": -3.1073105335235596, "logits/rejected": -3.1245694160461426, "logps/chosen": -197.14981079101562, "logps/rejected": -161.67953491210938, "loss": 0.2424, "rewards/accuracies": 1.0, "rewards/chosen": 0.05173312872648239, "rewards/margins": 1.8211636543273926, "rewards/rejected": -1.7694306373596191, "step": 4149 }, { "epoch": 0.48, "learning_rate": 1.5886690858012405e-07, "logits/chosen": -3.5924038887023926, "logits/rejected": -3.124065637588501, "logps/chosen": -187.65786743164062, "logps/rejected": -172.00625610351562, "loss": 0.5185, "rewards/accuracies": 0.625, "rewards/chosen": -0.18045081198215485, "rewards/margins": 1.0323083400726318, "rewards/rejected": -1.2127591371536255, "step": 4150 }, { "epoch": 0.48, "learning_rate": 1.5883179211049983e-07, "logits/chosen": -2.971348762512207, "logits/rejected": -3.1058194637298584, "logps/chosen": -206.31903076171875, "logps/rejected": -194.45326232910156, "loss": 0.4181, "rewards/accuracies": 0.75, "rewards/chosen": -0.012494999915361404, "rewards/margins": 0.9520470499992371, "rewards/rejected": -0.964542031288147, "step": 4151 }, { "epoch": 0.48, "learning_rate": 1.5879667564087558e-07, "logits/chosen": -3.608729839324951, "logits/rejected": -3.43735671043396, "logps/chosen": -139.9273223876953, "logps/rejected": -224.5488739013672, "loss": 0.251, "rewards/accuracies": 0.875, "rewards/chosen": 0.494653582572937, "rewards/margins": 1.9678739309310913, "rewards/rejected": -1.4732205867767334, "step": 4152 }, { "epoch": 0.48, "learning_rate": 1.587615591712513e-07, "logits/chosen": -3.377650260925293, "logits/rejected": -3.406541347503662, "logps/chosen": -109.91259002685547, "logps/rejected": -203.92257690429688, "loss": 0.3117, "rewards/accuracies": 0.75, "rewards/chosen": -0.22900398075580597, "rewards/margins": 1.6668479442596436, "rewards/rejected": -1.895851969718933, "step": 4153 }, { "epoch": 0.48, "learning_rate": 1.5872644270162707e-07, "logits/chosen": -2.764587879180908, "logits/rejected": -2.773922920227051, "logps/chosen": -306.7787780761719, "logps/rejected": -230.67587280273438, "loss": 0.7821, "rewards/accuracies": 0.5, "rewards/chosen": -0.6683786511421204, "rewards/margins": 0.7920563220977783, "rewards/rejected": -1.460434913635254, "step": 4154 }, { "epoch": 0.48, "learning_rate": 1.586913262320028e-07, "logits/chosen": -3.078274965286255, "logits/rejected": -3.2264177799224854, "logps/chosen": -264.54168701171875, "logps/rejected": -236.73025512695312, "loss": 0.186, "rewards/accuracies": 1.0, "rewards/chosen": 0.4314913749694824, "rewards/margins": 2.771956443786621, "rewards/rejected": -2.3404650688171387, "step": 4155 }, { "epoch": 0.48, "learning_rate": 1.5865620976237855e-07, "logits/chosen": -4.101260185241699, "logits/rejected": -3.8840391635894775, "logps/chosen": -232.98101806640625, "logps/rejected": -216.6641845703125, "loss": 0.4229, "rewards/accuracies": 0.875, "rewards/chosen": -0.5030287504196167, "rewards/margins": 0.859880805015564, "rewards/rejected": -1.3629095554351807, "step": 4156 }, { "epoch": 0.48, "learning_rate": 1.586210932927543e-07, "logits/chosen": -2.9791393280029297, "logits/rejected": -2.668295383453369, "logps/chosen": -266.3150634765625, "logps/rejected": -311.8343505859375, "loss": 0.7597, "rewards/accuracies": 0.5, "rewards/chosen": -0.03563022240996361, "rewards/margins": 1.127402663230896, "rewards/rejected": -1.16303288936615, "step": 4157 }, { "epoch": 0.48, "learning_rate": 1.5858597682313003e-07, "logits/chosen": -3.125279188156128, "logits/rejected": -3.154484510421753, "logps/chosen": -334.1747741699219, "logps/rejected": -210.64703369140625, "loss": 0.2998, "rewards/accuracies": 0.875, "rewards/chosen": -0.4042523205280304, "rewards/margins": 1.8817850351333618, "rewards/rejected": -2.2860374450683594, "step": 4158 }, { "epoch": 0.48, "learning_rate": 1.5855086035350578e-07, "logits/chosen": -3.1841514110565186, "logits/rejected": -3.193410873413086, "logps/chosen": -318.4084167480469, "logps/rejected": -224.02064514160156, "loss": 0.3649, "rewards/accuracies": 1.0, "rewards/chosen": -0.1609807014465332, "rewards/margins": 1.169592022895813, "rewards/rejected": -1.3305727243423462, "step": 4159 }, { "epoch": 0.48, "learning_rate": 1.5851574388388154e-07, "logits/chosen": -3.733480930328369, "logits/rejected": -3.541449546813965, "logps/chosen": -201.5786895751953, "logps/rejected": -289.6544189453125, "loss": 0.327, "rewards/accuracies": 0.75, "rewards/chosen": -0.4985743463039398, "rewards/margins": 2.755082607269287, "rewards/rejected": -3.2536568641662598, "step": 4160 }, { "epoch": 0.48, "learning_rate": 1.5848062741425726e-07, "logits/chosen": -3.407705783843994, "logits/rejected": -3.589371919631958, "logps/chosen": -188.40481567382812, "logps/rejected": -474.9868469238281, "loss": 0.358, "rewards/accuracies": 0.875, "rewards/chosen": 0.2394881695508957, "rewards/margins": 2.6502137184143066, "rewards/rejected": -2.4107258319854736, "step": 4161 }, { "epoch": 0.48, "learning_rate": 1.5844551094463304e-07, "logits/chosen": -3.035860538482666, "logits/rejected": -2.973696708679199, "logps/chosen": -395.4267578125, "logps/rejected": -490.580810546875, "loss": 0.2666, "rewards/accuracies": 0.875, "rewards/chosen": -0.2520816922187805, "rewards/margins": 2.0813684463500977, "rewards/rejected": -2.3334503173828125, "step": 4162 }, { "epoch": 0.48, "learning_rate": 1.5841039447500874e-07, "logits/chosen": -3.364637851715088, "logits/rejected": -3.331660032272339, "logps/chosen": -204.3849334716797, "logps/rejected": -230.2228240966797, "loss": 0.4245, "rewards/accuracies": 0.75, "rewards/chosen": -0.4740438759326935, "rewards/margins": 1.7664529085159302, "rewards/rejected": -2.2404966354370117, "step": 4163 }, { "epoch": 0.48, "learning_rate": 1.5837527800538453e-07, "logits/chosen": -3.995433807373047, "logits/rejected": -3.518481731414795, "logps/chosen": -248.86956787109375, "logps/rejected": -194.94406127929688, "loss": 0.3877, "rewards/accuracies": 0.75, "rewards/chosen": -0.22125881910324097, "rewards/margins": 1.5231809616088867, "rewards/rejected": -1.7444398403167725, "step": 4164 }, { "epoch": 0.48, "learning_rate": 1.5834016153576028e-07, "logits/chosen": -2.671725034713745, "logits/rejected": -2.711016893386841, "logps/chosen": -304.2961120605469, "logps/rejected": -266.02398681640625, "loss": 0.4332, "rewards/accuracies": 0.875, "rewards/chosen": -0.1302916407585144, "rewards/margins": 0.7803993225097656, "rewards/rejected": -0.9106910228729248, "step": 4165 }, { "epoch": 0.48, "learning_rate": 1.58305045066136e-07, "logits/chosen": -3.256270408630371, "logits/rejected": -3.1990599632263184, "logps/chosen": -240.757568359375, "logps/rejected": -277.7171325683594, "loss": 0.2958, "rewards/accuracies": 0.875, "rewards/chosen": -0.1540175974369049, "rewards/margins": 1.7463639974594116, "rewards/rejected": -1.9003815650939941, "step": 4166 }, { "epoch": 0.48, "learning_rate": 1.5826992859651176e-07, "logits/chosen": -3.061842679977417, "logits/rejected": -3.119476795196533, "logps/chosen": -129.54177856445312, "logps/rejected": -194.00811767578125, "loss": 0.5123, "rewards/accuracies": 0.875, "rewards/chosen": 0.028519943356513977, "rewards/margins": 1.5739359855651855, "rewards/rejected": -1.5454161167144775, "step": 4167 }, { "epoch": 0.48, "learning_rate": 1.5823481212688751e-07, "logits/chosen": -3.072685480117798, "logits/rejected": -3.0845108032226562, "logps/chosen": -157.47801208496094, "logps/rejected": -165.497314453125, "loss": 0.5806, "rewards/accuracies": 0.625, "rewards/chosen": -0.4884871244430542, "rewards/margins": 0.7290863990783691, "rewards/rejected": -1.217573642730713, "step": 4168 }, { "epoch": 0.48, "learning_rate": 1.5819969565726324e-07, "logits/chosen": -3.3165225982666016, "logits/rejected": -3.055427074432373, "logps/chosen": -380.9521789550781, "logps/rejected": -222.57113647460938, "loss": 0.5083, "rewards/accuracies": 0.625, "rewards/chosen": -0.5689890384674072, "rewards/margins": 1.333040714263916, "rewards/rejected": -1.9020297527313232, "step": 4169 }, { "epoch": 0.48, "learning_rate": 1.58164579187639e-07, "logits/chosen": -3.436197280883789, "logits/rejected": -3.4974095821380615, "logps/chosen": -211.1973419189453, "logps/rejected": -230.60227966308594, "loss": 0.374, "rewards/accuracies": 0.75, "rewards/chosen": -0.26635798811912537, "rewards/margins": 1.5973553657531738, "rewards/rejected": -1.8637133836746216, "step": 4170 }, { "epoch": 0.48, "learning_rate": 1.5812946271801472e-07, "logits/chosen": -2.789534568786621, "logits/rejected": -2.844447135925293, "logps/chosen": -194.02261352539062, "logps/rejected": -214.2145538330078, "loss": 0.3057, "rewards/accuracies": 0.75, "rewards/chosen": 0.11948111653327942, "rewards/margins": 1.8482369184494019, "rewards/rejected": -1.7287559509277344, "step": 4171 }, { "epoch": 0.48, "learning_rate": 1.5809434624839048e-07, "logits/chosen": -3.237412929534912, "logits/rejected": -3.3599817752838135, "logps/chosen": -123.2769546508789, "logps/rejected": -217.07310485839844, "loss": 0.4538, "rewards/accuracies": 0.875, "rewards/chosen": 0.23284097015857697, "rewards/margins": 1.2071439027786255, "rewards/rejected": -0.9743030071258545, "step": 4172 }, { "epoch": 0.48, "learning_rate": 1.5805922977876626e-07, "logits/chosen": -2.720076560974121, "logits/rejected": -2.866028308868408, "logps/chosen": -278.5878601074219, "logps/rejected": -380.5396728515625, "loss": 0.4431, "rewards/accuracies": 0.875, "rewards/chosen": -0.20925235748291016, "rewards/margins": 0.8312242031097412, "rewards/rejected": -1.0404765605926514, "step": 4173 }, { "epoch": 0.48, "learning_rate": 1.5802411330914196e-07, "logits/chosen": -2.9968390464782715, "logits/rejected": -2.8339056968688965, "logps/chosen": -191.98626708984375, "logps/rejected": -225.85873413085938, "loss": 0.5175, "rewards/accuracies": 0.75, "rewards/chosen": -0.33503589034080505, "rewards/margins": 0.9331789612770081, "rewards/rejected": -1.2682148218154907, "step": 4174 }, { "epoch": 0.48, "learning_rate": 1.5798899683951774e-07, "logits/chosen": -3.2908854484558105, "logits/rejected": -3.1715598106384277, "logps/chosen": -224.91612243652344, "logps/rejected": -214.47206115722656, "loss": 0.4118, "rewards/accuracies": 0.625, "rewards/chosen": -0.005865946412086487, "rewards/margins": 2.2581474781036377, "rewards/rejected": -2.2640132904052734, "step": 4175 }, { "epoch": 0.48, "learning_rate": 1.579538803698935e-07, "logits/chosen": -2.492210865020752, "logits/rejected": -2.613584041595459, "logps/chosen": -315.2838134765625, "logps/rejected": -326.8943786621094, "loss": 0.8461, "rewards/accuracies": 0.375, "rewards/chosen": -0.7833446860313416, "rewards/margins": 0.2049870491027832, "rewards/rejected": -0.98833167552948, "step": 4176 }, { "epoch": 0.48, "learning_rate": 1.5791876390026922e-07, "logits/chosen": -3.206112861633301, "logits/rejected": -3.4229862689971924, "logps/chosen": -169.55886840820312, "logps/rejected": -188.4399871826172, "loss": 0.3536, "rewards/accuracies": 0.75, "rewards/chosen": 0.29534098505973816, "rewards/margins": 2.3685572147369385, "rewards/rejected": -2.073216199874878, "step": 4177 }, { "epoch": 0.48, "learning_rate": 1.5788364743064497e-07, "logits/chosen": -3.458667755126953, "logits/rejected": -3.0830135345458984, "logps/chosen": -111.20146179199219, "logps/rejected": -108.19001770019531, "loss": 0.3644, "rewards/accuracies": 0.875, "rewards/chosen": -0.4490020275115967, "rewards/margins": 0.9449406266212463, "rewards/rejected": -1.3939425945281982, "step": 4178 }, { "epoch": 0.48, "learning_rate": 1.578485309610207e-07, "logits/chosen": -3.9089512825012207, "logits/rejected": -3.9385523796081543, "logps/chosen": -166.7404327392578, "logps/rejected": -170.56027221679688, "loss": 0.1603, "rewards/accuracies": 1.0, "rewards/chosen": 0.07523494213819504, "rewards/margins": 2.6744654178619385, "rewards/rejected": -2.5992302894592285, "step": 4179 }, { "epoch": 0.48, "learning_rate": 1.5781341449139645e-07, "logits/chosen": -3.507452964782715, "logits/rejected": -3.558997392654419, "logps/chosen": -254.794189453125, "logps/rejected": -274.3380126953125, "loss": 0.1371, "rewards/accuracies": 1.0, "rewards/chosen": 0.2245100736618042, "rewards/margins": 3.1066999435424805, "rewards/rejected": -2.882189989089966, "step": 4180 }, { "epoch": 0.48, "learning_rate": 1.577782980217722e-07, "logits/chosen": -3.2125909328460693, "logits/rejected": -3.280500650405884, "logps/chosen": -188.98243713378906, "logps/rejected": -212.83255004882812, "loss": 0.3226, "rewards/accuracies": 0.875, "rewards/chosen": 0.15744900703430176, "rewards/margins": 2.0082292556762695, "rewards/rejected": -1.8507802486419678, "step": 4181 }, { "epoch": 0.48, "learning_rate": 1.5774318155214794e-07, "logits/chosen": -2.36220645904541, "logits/rejected": -2.720154285430908, "logps/chosen": -212.7046356201172, "logps/rejected": -242.90753173828125, "loss": 0.399, "rewards/accuracies": 0.875, "rewards/chosen": 0.18542778491973877, "rewards/margins": 2.3256349563598633, "rewards/rejected": -2.140207290649414, "step": 4182 }, { "epoch": 0.48, "learning_rate": 1.577080650825237e-07, "logits/chosen": -2.5293242931365967, "logits/rejected": -2.6789298057556152, "logps/chosen": -131.03353881835938, "logps/rejected": -193.84263610839844, "loss": 0.365, "rewards/accuracies": 0.875, "rewards/chosen": 0.31866520643234253, "rewards/margins": 1.1562637090682983, "rewards/rejected": -0.8375985026359558, "step": 4183 }, { "epoch": 0.48, "learning_rate": 1.5767294861289947e-07, "logits/chosen": -2.7968947887420654, "logits/rejected": -2.501553535461426, "logps/chosen": -210.76560974121094, "logps/rejected": -813.0324096679688, "loss": 0.243, "rewards/accuracies": 0.875, "rewards/chosen": 0.01450406014919281, "rewards/margins": 3.042210578918457, "rewards/rejected": -3.0277066230773926, "step": 4184 }, { "epoch": 0.48, "learning_rate": 1.576378321432752e-07, "logits/chosen": -2.9570178985595703, "logits/rejected": -3.3056976795196533, "logps/chosen": -355.7897033691406, "logps/rejected": -413.2002258300781, "loss": 0.1839, "rewards/accuracies": 0.875, "rewards/chosen": 0.26672905683517456, "rewards/margins": 3.47041654586792, "rewards/rejected": -3.2036876678466797, "step": 4185 }, { "epoch": 0.48, "learning_rate": 1.5760271567365095e-07, "logits/chosen": -3.0302488803863525, "logits/rejected": -2.9772324562072754, "logps/chosen": -326.79864501953125, "logps/rejected": -260.8951416015625, "loss": 0.3762, "rewards/accuracies": 0.875, "rewards/chosen": -0.2330956757068634, "rewards/margins": 1.180436134338379, "rewards/rejected": -1.4135316610336304, "step": 4186 }, { "epoch": 0.48, "learning_rate": 1.5756759920402668e-07, "logits/chosen": -3.4028916358947754, "logits/rejected": -3.542877674102783, "logps/chosen": -123.42025756835938, "logps/rejected": -216.08273315429688, "loss": 0.388, "rewards/accuracies": 0.75, "rewards/chosen": -0.11894366145133972, "rewards/margins": 1.5408967733383179, "rewards/rejected": -1.65984046459198, "step": 4187 }, { "epoch": 0.48, "learning_rate": 1.5753248273440243e-07, "logits/chosen": -3.4626107215881348, "logits/rejected": -3.3268773555755615, "logps/chosen": -191.43275451660156, "logps/rejected": -182.3699493408203, "loss": 0.4003, "rewards/accuracies": 0.875, "rewards/chosen": 0.1424209475517273, "rewards/margins": 1.4959142208099365, "rewards/rejected": -1.353493332862854, "step": 4188 }, { "epoch": 0.48, "learning_rate": 1.5749736626477819e-07, "logits/chosen": -2.8813586235046387, "logits/rejected": -2.739018440246582, "logps/chosen": -288.30352783203125, "logps/rejected": -290.74310302734375, "loss": 0.5272, "rewards/accuracies": 0.625, "rewards/chosen": -0.2124200463294983, "rewards/margins": 1.4833290576934814, "rewards/rejected": -1.695749282836914, "step": 4189 }, { "epoch": 0.48, "learning_rate": 1.5746224979515391e-07, "logits/chosen": -3.558523654937744, "logits/rejected": -3.307265281677246, "logps/chosen": -165.4458465576172, "logps/rejected": -186.44650268554688, "loss": 0.3248, "rewards/accuracies": 0.875, "rewards/chosen": 0.08462736010551453, "rewards/margins": 1.5774463415145874, "rewards/rejected": -1.492818832397461, "step": 4190 }, { "epoch": 0.48, "learning_rate": 1.5742713332552967e-07, "logits/chosen": -3.555126428604126, "logits/rejected": -3.742110252380371, "logps/chosen": -199.5130615234375, "logps/rejected": -258.3553466796875, "loss": 0.5752, "rewards/accuracies": 0.75, "rewards/chosen": -0.4030669927597046, "rewards/margins": 0.862259030342102, "rewards/rejected": -1.2653260231018066, "step": 4191 }, { "epoch": 0.48, "learning_rate": 1.573920168559054e-07, "logits/chosen": -3.0914368629455566, "logits/rejected": -3.417205572128296, "logps/chosen": -289.152587890625, "logps/rejected": -362.5640869140625, "loss": 0.4299, "rewards/accuracies": 0.75, "rewards/chosen": -0.6245719790458679, "rewards/margins": 1.49722421169281, "rewards/rejected": -2.121796131134033, "step": 4192 }, { "epoch": 0.48, "learning_rate": 1.5735690038628115e-07, "logits/chosen": -2.8086109161376953, "logits/rejected": -3.094818115234375, "logps/chosen": -154.9805908203125, "logps/rejected": -219.42086791992188, "loss": 0.3281, "rewards/accuracies": 0.75, "rewards/chosen": 0.7028059959411621, "rewards/margins": 2.508117198944092, "rewards/rejected": -1.8053112030029297, "step": 4193 }, { "epoch": 0.48, "learning_rate": 1.573217839166569e-07, "logits/chosen": -3.255518913269043, "logits/rejected": -3.0628411769866943, "logps/chosen": -431.36981201171875, "logps/rejected": -319.5850524902344, "loss": 0.413, "rewards/accuracies": 0.625, "rewards/chosen": -0.7073135375976562, "rewards/margins": 1.132576584815979, "rewards/rejected": -1.8398902416229248, "step": 4194 }, { "epoch": 0.48, "learning_rate": 1.5728666744703263e-07, "logits/chosen": -3.179767608642578, "logits/rejected": -2.9064841270446777, "logps/chosen": -230.9851837158203, "logps/rejected": -145.33529663085938, "loss": 1.3289, "rewards/accuracies": 0.875, "rewards/chosen": -1.3804271221160889, "rewards/margins": 0.5264627933502197, "rewards/rejected": -1.9068900346755981, "step": 4195 }, { "epoch": 0.48, "learning_rate": 1.572515509774084e-07, "logits/chosen": -3.006401300430298, "logits/rejected": -3.1342170238494873, "logps/chosen": -96.12712860107422, "logps/rejected": -139.85797119140625, "loss": 0.6567, "rewards/accuracies": 0.875, "rewards/chosen": -0.6084538102149963, "rewards/margins": 1.7247674465179443, "rewards/rejected": -2.333221197128296, "step": 4196 }, { "epoch": 0.48, "learning_rate": 1.5721643450778416e-07, "logits/chosen": -3.9080986976623535, "logits/rejected": -3.6181583404541016, "logps/chosen": -158.03302001953125, "logps/rejected": -166.3197479248047, "loss": 0.4082, "rewards/accuracies": 0.875, "rewards/chosen": -0.6031032800674438, "rewards/margins": 1.2207880020141602, "rewards/rejected": -1.8238911628723145, "step": 4197 }, { "epoch": 0.48, "learning_rate": 1.571813180381599e-07, "logits/chosen": -3.410707950592041, "logits/rejected": -3.3439064025878906, "logps/chosen": -236.82763671875, "logps/rejected": -151.862060546875, "loss": 0.2345, "rewards/accuracies": 1.0, "rewards/chosen": 0.19808907806873322, "rewards/margins": 2.4813783168792725, "rewards/rejected": -2.2832894325256348, "step": 4198 }, { "epoch": 0.48, "learning_rate": 1.5714620156853565e-07, "logits/chosen": -2.9063782691955566, "logits/rejected": -3.174765110015869, "logps/chosen": -214.12179565429688, "logps/rejected": -355.12481689453125, "loss": 0.3639, "rewards/accuracies": 0.75, "rewards/chosen": -0.11183272302150726, "rewards/margins": 2.061330795288086, "rewards/rejected": -2.1731631755828857, "step": 4199 }, { "epoch": 0.48, "learning_rate": 1.5711108509891137e-07, "logits/chosen": -2.501115322113037, "logits/rejected": -2.9578757286071777, "logps/chosen": -199.41876220703125, "logps/rejected": -266.54217529296875, "loss": 0.241, "rewards/accuracies": 0.875, "rewards/chosen": 0.04883028566837311, "rewards/margins": 2.872514247894287, "rewards/rejected": -2.8236842155456543, "step": 4200 }, { "epoch": 0.48, "learning_rate": 1.5707596862928713e-07, "logits/chosen": -3.7064433097839355, "logits/rejected": -3.3666961193084717, "logps/chosen": -447.16461181640625, "logps/rejected": -335.2342529296875, "loss": 0.3885, "rewards/accuracies": 0.75, "rewards/chosen": -0.0724840983748436, "rewards/margins": 2.597301959991455, "rewards/rejected": -2.669785737991333, "step": 4201 }, { "epoch": 0.48, "learning_rate": 1.5704085215966288e-07, "logits/chosen": -3.25488018989563, "logits/rejected": -3.167314291000366, "logps/chosen": -306.48760986328125, "logps/rejected": -220.80726623535156, "loss": 0.3166, "rewards/accuracies": 0.875, "rewards/chosen": 0.2944314479827881, "rewards/margins": 1.742204189300537, "rewards/rejected": -1.4477726221084595, "step": 4202 }, { "epoch": 0.48, "learning_rate": 1.570057356900386e-07, "logits/chosen": -2.9310877323150635, "logits/rejected": -3.0971431732177734, "logps/chosen": -223.73374938964844, "logps/rejected": -331.232421875, "loss": 0.5306, "rewards/accuracies": 0.875, "rewards/chosen": -0.023408517241477966, "rewards/margins": 1.2062346935272217, "rewards/rejected": -1.2296431064605713, "step": 4203 }, { "epoch": 0.48, "learning_rate": 1.5697061922041436e-07, "logits/chosen": -3.45886492729187, "logits/rejected": -3.359304189682007, "logps/chosen": -211.30303955078125, "logps/rejected": -229.62911987304688, "loss": 0.3257, "rewards/accuracies": 0.875, "rewards/chosen": 0.12690195441246033, "rewards/margins": 1.5767028331756592, "rewards/rejected": -1.449800729751587, "step": 4204 }, { "epoch": 0.48, "learning_rate": 1.5693550275079012e-07, "logits/chosen": -2.709913492202759, "logits/rejected": -2.496840000152588, "logps/chosen": -311.2287902832031, "logps/rejected": -232.8653564453125, "loss": 0.5268, "rewards/accuracies": 0.625, "rewards/chosen": -0.058904170989990234, "rewards/margins": 0.9304572343826294, "rewards/rejected": -0.9893614053726196, "step": 4205 }, { "epoch": 0.48, "learning_rate": 1.5690038628116584e-07, "logits/chosen": -2.466404914855957, "logits/rejected": -2.4961793422698975, "logps/chosen": -350.48828125, "logps/rejected": -313.3509521484375, "loss": 0.4498, "rewards/accuracies": 0.75, "rewards/chosen": -0.5067083239555359, "rewards/margins": 1.4223328828811646, "rewards/rejected": -1.9290411472320557, "step": 4206 }, { "epoch": 0.48, "learning_rate": 1.5686526981154162e-07, "logits/chosen": -2.6970109939575195, "logits/rejected": -2.7534914016723633, "logps/chosen": -293.1230163574219, "logps/rejected": -260.654296875, "loss": 0.7455, "rewards/accuracies": 0.75, "rewards/chosen": -0.7609258890151978, "rewards/margins": 2.2377963066101074, "rewards/rejected": -2.9987223148345947, "step": 4207 }, { "epoch": 0.49, "learning_rate": 1.5683015334191732e-07, "logits/chosen": -3.0272185802459717, "logits/rejected": -3.288188934326172, "logps/chosen": -286.25213623046875, "logps/rejected": -256.9794616699219, "loss": 0.1881, "rewards/accuracies": 1.0, "rewards/chosen": 0.20232467353343964, "rewards/margins": 2.6620264053344727, "rewards/rejected": -2.4597017765045166, "step": 4208 }, { "epoch": 0.49, "learning_rate": 1.567950368722931e-07, "logits/chosen": -2.322103500366211, "logits/rejected": -2.1082992553710938, "logps/chosen": -245.9691925048828, "logps/rejected": -201.75152587890625, "loss": 0.5244, "rewards/accuracies": 0.75, "rewards/chosen": -0.06403293460607529, "rewards/margins": 0.6996648907661438, "rewards/rejected": -0.7636978626251221, "step": 4209 }, { "epoch": 0.49, "learning_rate": 1.5675992040266886e-07, "logits/chosen": -2.707446575164795, "logits/rejected": -2.630547523498535, "logps/chosen": -443.3980407714844, "logps/rejected": -383.24945068359375, "loss": 0.4298, "rewards/accuracies": 0.75, "rewards/chosen": 0.7467562556266785, "rewards/margins": 0.8330624103546143, "rewards/rejected": -0.08630618453025818, "step": 4210 }, { "epoch": 0.49, "learning_rate": 1.5672480393304459e-07, "logits/chosen": -3.244673252105713, "logits/rejected": -2.7307262420654297, "logps/chosen": -303.138671875, "logps/rejected": -168.0112762451172, "loss": 0.2167, "rewards/accuracies": 0.875, "rewards/chosen": 0.37710079550743103, "rewards/margins": 2.1766321659088135, "rewards/rejected": -1.7995314598083496, "step": 4211 }, { "epoch": 0.49, "learning_rate": 1.5668968746342034e-07, "logits/chosen": -2.636540412902832, "logits/rejected": -2.874347686767578, "logps/chosen": -429.65777587890625, "logps/rejected": -254.24232482910156, "loss": 0.6646, "rewards/accuracies": 0.75, "rewards/chosen": -0.12156897783279419, "rewards/margins": 0.8506889343261719, "rewards/rejected": -0.9722579717636108, "step": 4212 }, { "epoch": 0.49, "learning_rate": 1.566545709937961e-07, "logits/chosen": -3.42804217338562, "logits/rejected": -3.604773998260498, "logps/chosen": -142.44583129882812, "logps/rejected": -176.04896545410156, "loss": 0.5046, "rewards/accuracies": 0.625, "rewards/chosen": 0.18585234880447388, "rewards/margins": 1.3978816270828247, "rewards/rejected": -1.212029218673706, "step": 4213 }, { "epoch": 0.49, "learning_rate": 1.5661945452417182e-07, "logits/chosen": -3.365210771560669, "logits/rejected": -3.182372570037842, "logps/chosen": -299.1852722167969, "logps/rejected": -220.96194458007812, "loss": 0.5082, "rewards/accuracies": 0.75, "rewards/chosen": -0.4923288822174072, "rewards/margins": 0.9294145107269287, "rewards/rejected": -1.421743392944336, "step": 4214 }, { "epoch": 0.49, "learning_rate": 1.5658433805454757e-07, "logits/chosen": -2.574521780014038, "logits/rejected": -2.6343436241149902, "logps/chosen": -559.0587158203125, "logps/rejected": -380.2601623535156, "loss": 0.5195, "rewards/accuracies": 0.75, "rewards/chosen": 0.2671915292739868, "rewards/margins": 0.817820131778717, "rewards/rejected": -0.5506286025047302, "step": 4215 }, { "epoch": 0.49, "learning_rate": 1.565492215849233e-07, "logits/chosen": -3.251523971557617, "logits/rejected": -3.603330612182617, "logps/chosen": -231.7414093017578, "logps/rejected": -213.80653381347656, "loss": 0.1922, "rewards/accuracies": 1.0, "rewards/chosen": -0.16383808851242065, "rewards/margins": 1.8391492366790771, "rewards/rejected": -2.0029873847961426, "step": 4216 }, { "epoch": 0.49, "learning_rate": 1.5651410511529906e-07, "logits/chosen": -3.026132106781006, "logits/rejected": -2.8859915733337402, "logps/chosen": -144.35787963867188, "logps/rejected": -156.07640075683594, "loss": 0.605, "rewards/accuracies": 0.625, "rewards/chosen": -0.47343769669532776, "rewards/margins": 1.0828468799591064, "rewards/rejected": -1.5562846660614014, "step": 4217 }, { "epoch": 0.49, "learning_rate": 1.5647898864567484e-07, "logits/chosen": -3.53056263923645, "logits/rejected": -3.6490373611450195, "logps/chosen": -154.6624298095703, "logps/rejected": -172.98361206054688, "loss": 0.4218, "rewards/accuracies": 0.75, "rewards/chosen": -0.7092382907867432, "rewards/margins": 1.244614601135254, "rewards/rejected": -1.953852891921997, "step": 4218 }, { "epoch": 0.49, "learning_rate": 1.5644387217605056e-07, "logits/chosen": -3.7218546867370605, "logits/rejected": -3.6570980548858643, "logps/chosen": -344.14385986328125, "logps/rejected": -243.33856201171875, "loss": 0.2545, "rewards/accuracies": 0.875, "rewards/chosen": 0.2171463519334793, "rewards/margins": 2.0274670124053955, "rewards/rejected": -1.8103206157684326, "step": 4219 }, { "epoch": 0.49, "learning_rate": 1.5640875570642632e-07, "logits/chosen": -3.8679966926574707, "logits/rejected": -3.387474536895752, "logps/chosen": -242.87124633789062, "logps/rejected": -183.67166137695312, "loss": 0.6652, "rewards/accuracies": 0.625, "rewards/chosen": -0.8209249377250671, "rewards/margins": 0.864315390586853, "rewards/rejected": -1.6852402687072754, "step": 4220 }, { "epoch": 0.49, "learning_rate": 1.5637363923680207e-07, "logits/chosen": -3.067713737487793, "logits/rejected": -2.9026548862457275, "logps/chosen": -159.93829345703125, "logps/rejected": -169.57919311523438, "loss": 0.6137, "rewards/accuracies": 0.5, "rewards/chosen": -0.6820780038833618, "rewards/margins": 0.3177064061164856, "rewards/rejected": -0.9997844099998474, "step": 4221 }, { "epoch": 0.49, "learning_rate": 1.563385227671778e-07, "logits/chosen": -3.250931978225708, "logits/rejected": -2.9872829914093018, "logps/chosen": -277.15509033203125, "logps/rejected": -322.9438781738281, "loss": 0.6291, "rewards/accuracies": 0.75, "rewards/chosen": -0.14678996801376343, "rewards/margins": 1.599380373954773, "rewards/rejected": -1.7461705207824707, "step": 4222 }, { "epoch": 0.49, "learning_rate": 1.5630340629755355e-07, "logits/chosen": -2.6062636375427246, "logits/rejected": -2.682535409927368, "logps/chosen": -197.87490844726562, "logps/rejected": -384.34246826171875, "loss": 0.3425, "rewards/accuracies": 0.75, "rewards/chosen": 0.17498986423015594, "rewards/margins": 2.6025538444519043, "rewards/rejected": -2.4275639057159424, "step": 4223 }, { "epoch": 0.49, "learning_rate": 1.5626828982792928e-07, "logits/chosen": -2.9401419162750244, "logits/rejected": -2.899386405944824, "logps/chosen": -420.2550354003906, "logps/rejected": -417.41424560546875, "loss": 0.2129, "rewards/accuracies": 1.0, "rewards/chosen": 0.16950300335884094, "rewards/margins": 2.0824363231658936, "rewards/rejected": -1.9129332304000854, "step": 4224 }, { "epoch": 0.49, "learning_rate": 1.5623317335830503e-07, "logits/chosen": -2.8833069801330566, "logits/rejected": -3.197333335876465, "logps/chosen": -271.4185791015625, "logps/rejected": -166.21835327148438, "loss": 0.4951, "rewards/accuracies": 0.75, "rewards/chosen": -0.09382620453834534, "rewards/margins": 1.207953929901123, "rewards/rejected": -1.3017802238464355, "step": 4225 }, { "epoch": 0.49, "learning_rate": 1.561980568886808e-07, "logits/chosen": -2.993009090423584, "logits/rejected": -2.9859085083007812, "logps/chosen": -377.30682373046875, "logps/rejected": -305.17755126953125, "loss": 0.4752, "rewards/accuracies": 0.875, "rewards/chosen": -0.14905662834644318, "rewards/margins": 1.534812569618225, "rewards/rejected": -1.6838691234588623, "step": 4226 }, { "epoch": 0.49, "learning_rate": 1.5616294041905652e-07, "logits/chosen": -3.4217991828918457, "logits/rejected": -3.350095510482788, "logps/chosen": -172.26104736328125, "logps/rejected": -200.93963623046875, "loss": 0.2791, "rewards/accuracies": 0.875, "rewards/chosen": 0.4336477518081665, "rewards/margins": 1.4851210117340088, "rewards/rejected": -1.0514732599258423, "step": 4227 }, { "epoch": 0.49, "learning_rate": 1.5612782394943227e-07, "logits/chosen": -3.4234859943389893, "logits/rejected": -3.6478309631347656, "logps/chosen": -355.0272521972656, "logps/rejected": -292.01788330078125, "loss": 0.615, "rewards/accuracies": 0.75, "rewards/chosen": -0.7134689688682556, "rewards/margins": 1.1417183876037598, "rewards/rejected": -1.8551874160766602, "step": 4228 }, { "epoch": 0.49, "learning_rate": 1.5609270747980805e-07, "logits/chosen": -3.3391733169555664, "logits/rejected": -3.241987705230713, "logps/chosen": -240.895263671875, "logps/rejected": -240.29330444335938, "loss": 0.4052, "rewards/accuracies": 0.875, "rewards/chosen": -0.2518627345561981, "rewards/margins": 0.9093825221061707, "rewards/rejected": -1.1612452268600464, "step": 4229 }, { "epoch": 0.49, "learning_rate": 1.5605759101018378e-07, "logits/chosen": -3.732632637023926, "logits/rejected": -3.5530200004577637, "logps/chosen": -268.4189758300781, "logps/rejected": -181.23297119140625, "loss": 0.4128, "rewards/accuracies": 0.75, "rewards/chosen": -0.46662265062332153, "rewards/margins": 2.0227432250976562, "rewards/rejected": -2.489366054534912, "step": 4230 }, { "epoch": 0.49, "learning_rate": 1.5602247454055953e-07, "logits/chosen": -2.5775036811828613, "logits/rejected": -2.691265106201172, "logps/chosen": -398.1290283203125, "logps/rejected": -354.6471252441406, "loss": 0.4892, "rewards/accuracies": 0.875, "rewards/chosen": 0.16386035084724426, "rewards/margins": 0.906028687953949, "rewards/rejected": -0.7421683669090271, "step": 4231 }, { "epoch": 0.49, "learning_rate": 1.5598735807093526e-07, "logits/chosen": -3.540642738342285, "logits/rejected": -3.715785026550293, "logps/chosen": -176.8072509765625, "logps/rejected": -174.71444702148438, "loss": 0.3515, "rewards/accuracies": 1.0, "rewards/chosen": -0.5884497761726379, "rewards/margins": 2.1111292839050293, "rewards/rejected": -2.6995792388916016, "step": 4232 }, { "epoch": 0.49, "learning_rate": 1.55952241601311e-07, "logits/chosen": -3.19585919380188, "logits/rejected": -3.176499605178833, "logps/chosen": -318.3712158203125, "logps/rejected": -414.4798583984375, "loss": 0.5015, "rewards/accuracies": 0.75, "rewards/chosen": -0.2189367413520813, "rewards/margins": 2.98225998878479, "rewards/rejected": -3.2011966705322266, "step": 4233 }, { "epoch": 0.49, "learning_rate": 1.5591712513168677e-07, "logits/chosen": -3.364260673522949, "logits/rejected": -3.1206307411193848, "logps/chosen": -189.42726135253906, "logps/rejected": -154.1235809326172, "loss": 0.3679, "rewards/accuracies": 0.875, "rewards/chosen": -0.18897829949855804, "rewards/margins": 1.0639965534210205, "rewards/rejected": -1.2529747486114502, "step": 4234 }, { "epoch": 0.49, "learning_rate": 1.558820086620625e-07, "logits/chosen": -3.2002058029174805, "logits/rejected": -2.9947011470794678, "logps/chosen": -411.40997314453125, "logps/rejected": -274.5831604003906, "loss": 0.2626, "rewards/accuracies": 0.75, "rewards/chosen": -0.4559458792209625, "rewards/margins": 3.0677223205566406, "rewards/rejected": -3.5236682891845703, "step": 4235 }, { "epoch": 0.49, "learning_rate": 1.5584689219243825e-07, "logits/chosen": -3.203077554702759, "logits/rejected": -3.1748199462890625, "logps/chosen": -182.33050537109375, "logps/rejected": -198.01498413085938, "loss": 0.3675, "rewards/accuracies": 0.875, "rewards/chosen": 0.48663392663002014, "rewards/margins": 1.2618520259857178, "rewards/rejected": -0.7752181887626648, "step": 4236 }, { "epoch": 0.49, "learning_rate": 1.5581177572281397e-07, "logits/chosen": -3.6511764526367188, "logits/rejected": -3.474350690841675, "logps/chosen": -256.4237060546875, "logps/rejected": -304.7333068847656, "loss": 0.461, "rewards/accuracies": 0.75, "rewards/chosen": -0.2401297390460968, "rewards/margins": 1.5414609909057617, "rewards/rejected": -1.7815905809402466, "step": 4237 }, { "epoch": 0.49, "learning_rate": 1.5577665925318973e-07, "logits/chosen": -3.169029712677002, "logits/rejected": -3.134168863296509, "logps/chosen": -276.2920837402344, "logps/rejected": -174.845703125, "loss": 0.2662, "rewards/accuracies": 0.875, "rewards/chosen": 0.7510141730308533, "rewards/margins": 2.3454251289367676, "rewards/rejected": -1.594411015510559, "step": 4238 }, { "epoch": 0.49, "learning_rate": 1.5574154278356548e-07, "logits/chosen": -3.0075979232788086, "logits/rejected": -3.0985002517700195, "logps/chosen": -139.8168487548828, "logps/rejected": -218.863525390625, "loss": 0.3281, "rewards/accuracies": 0.75, "rewards/chosen": -0.4088268280029297, "rewards/margins": 1.648471474647522, "rewards/rejected": -2.057298421859741, "step": 4239 }, { "epoch": 0.49, "learning_rate": 1.557064263139412e-07, "logits/chosen": -3.7345809936523438, "logits/rejected": -3.436410427093506, "logps/chosen": -186.59226989746094, "logps/rejected": -169.72384643554688, "loss": 0.4119, "rewards/accuracies": 0.75, "rewards/chosen": -0.03154118359088898, "rewards/margins": 1.0449697971343994, "rewards/rejected": -1.076511025428772, "step": 4240 }, { "epoch": 0.49, "learning_rate": 1.55671309844317e-07, "logits/chosen": -3.218388080596924, "logits/rejected": -3.270371198654175, "logps/chosen": -304.01513671875, "logps/rejected": -222.3992462158203, "loss": 0.2911, "rewards/accuracies": 0.875, "rewards/chosen": 0.29795411229133606, "rewards/margins": 1.6727856397628784, "rewards/rejected": -1.3748315572738647, "step": 4241 }, { "epoch": 0.49, "learning_rate": 1.5563619337469274e-07, "logits/chosen": -3.5634844303131104, "logits/rejected": -3.6227400302886963, "logps/chosen": -270.78521728515625, "logps/rejected": -431.0711364746094, "loss": 0.2283, "rewards/accuracies": 0.875, "rewards/chosen": 0.845138430595398, "rewards/margins": 3.4672791957855225, "rewards/rejected": -2.622140884399414, "step": 4242 }, { "epoch": 0.49, "learning_rate": 1.5560107690506847e-07, "logits/chosen": -3.5754261016845703, "logits/rejected": -3.3849782943725586, "logps/chosen": -458.4036560058594, "logps/rejected": -357.0942077636719, "loss": 0.0865, "rewards/accuracies": 1.0, "rewards/chosen": 0.8364957571029663, "rewards/margins": 3.2704739570617676, "rewards/rejected": -2.4339780807495117, "step": 4243 }, { "epoch": 0.49, "learning_rate": 1.5556596043544422e-07, "logits/chosen": -3.0007286071777344, "logits/rejected": -3.0706701278686523, "logps/chosen": -220.73667907714844, "logps/rejected": -197.514404296875, "loss": 0.4157, "rewards/accuracies": 0.875, "rewards/chosen": 0.3154059946537018, "rewards/margins": 1.0341111421585083, "rewards/rejected": -0.7187052369117737, "step": 4244 }, { "epoch": 0.49, "learning_rate": 1.5553084396581995e-07, "logits/chosen": -3.0850510597229004, "logits/rejected": -3.114885091781616, "logps/chosen": -313.07269287109375, "logps/rejected": -372.5010986328125, "loss": 0.6345, "rewards/accuracies": 0.5, "rewards/chosen": -0.5051722526550293, "rewards/margins": 0.9299687743186951, "rewards/rejected": -1.4351409673690796, "step": 4245 }, { "epoch": 0.49, "learning_rate": 1.554957274961957e-07, "logits/chosen": -3.0106728076934814, "logits/rejected": -2.982508897781372, "logps/chosen": -285.2098083496094, "logps/rejected": -291.25421142578125, "loss": 0.225, "rewards/accuracies": 0.875, "rewards/chosen": 0.39871105551719666, "rewards/margins": 2.478614330291748, "rewards/rejected": -2.0799033641815186, "step": 4246 }, { "epoch": 0.49, "learning_rate": 1.5546061102657146e-07, "logits/chosen": -2.9142961502075195, "logits/rejected": -2.661449670791626, "logps/chosen": -473.8479309082031, "logps/rejected": -259.98773193359375, "loss": 0.3156, "rewards/accuracies": 0.75, "rewards/chosen": 0.23213748633861542, "rewards/margins": 1.9801653623580933, "rewards/rejected": -1.7480278015136719, "step": 4247 }, { "epoch": 0.49, "learning_rate": 1.554254945569472e-07, "logits/chosen": -3.015434980392456, "logits/rejected": -2.4764256477355957, "logps/chosen": -394.7430419921875, "logps/rejected": -302.72711181640625, "loss": 0.3018, "rewards/accuracies": 0.875, "rewards/chosen": 0.25437530875205994, "rewards/margins": 2.9203319549560547, "rewards/rejected": -2.665956497192383, "step": 4248 }, { "epoch": 0.49, "learning_rate": 1.5539037808732294e-07, "logits/chosen": -3.2985901832580566, "logits/rejected": -3.5631816387176514, "logps/chosen": -313.31622314453125, "logps/rejected": -276.43011474609375, "loss": 0.6255, "rewards/accuracies": 0.5, "rewards/chosen": -0.12556932866573334, "rewards/margins": 1.2440885305404663, "rewards/rejected": -1.3696579933166504, "step": 4249 }, { "epoch": 0.49, "learning_rate": 1.553552616176987e-07, "logits/chosen": -2.2757298946380615, "logits/rejected": -2.2371432781219482, "logps/chosen": -396.76214599609375, "logps/rejected": -314.0649719238281, "loss": 0.4346, "rewards/accuracies": 0.75, "rewards/chosen": -0.11369448155164719, "rewards/margins": 1.3494445085525513, "rewards/rejected": -1.4631389379501343, "step": 4250 }, { "epoch": 0.49, "learning_rate": 1.5532014514807442e-07, "logits/chosen": -3.2242932319641113, "logits/rejected": -3.4841742515563965, "logps/chosen": -177.48495483398438, "logps/rejected": -169.81857299804688, "loss": 0.507, "rewards/accuracies": 0.875, "rewards/chosen": -0.5749505758285522, "rewards/margins": 1.4896697998046875, "rewards/rejected": -2.06462025642395, "step": 4251 }, { "epoch": 0.49, "learning_rate": 1.552850286784502e-07, "logits/chosen": -2.6729962825775146, "logits/rejected": -2.699465274810791, "logps/chosen": -265.9630432128906, "logps/rejected": -268.8183288574219, "loss": 0.4266, "rewards/accuracies": 0.75, "rewards/chosen": -0.07760922610759735, "rewards/margins": 2.1892967224121094, "rewards/rejected": -2.2669060230255127, "step": 4252 }, { "epoch": 0.49, "learning_rate": 1.5524991220882593e-07, "logits/chosen": -3.195957660675049, "logits/rejected": -3.351248264312744, "logps/chosen": -269.3422546386719, "logps/rejected": -295.34222412109375, "loss": 0.1674, "rewards/accuracies": 1.0, "rewards/chosen": 0.27985018491744995, "rewards/margins": 2.2151968479156494, "rewards/rejected": -1.9353466033935547, "step": 4253 }, { "epoch": 0.49, "learning_rate": 1.5521479573920168e-07, "logits/chosen": -2.72761869430542, "logits/rejected": -2.7936758995056152, "logps/chosen": -170.69908142089844, "logps/rejected": -299.31597900390625, "loss": 0.4097, "rewards/accuracies": 0.75, "rewards/chosen": -0.44829198718070984, "rewards/margins": 2.8198447227478027, "rewards/rejected": -3.268136739730835, "step": 4254 }, { "epoch": 0.49, "learning_rate": 1.5517967926957744e-07, "logits/chosen": -3.437605381011963, "logits/rejected": -3.6807217597961426, "logps/chosen": -250.77328491210938, "logps/rejected": -272.0954284667969, "loss": 0.5243, "rewards/accuracies": 0.75, "rewards/chosen": -0.38885074853897095, "rewards/margins": 1.0766921043395996, "rewards/rejected": -1.4655427932739258, "step": 4255 }, { "epoch": 0.49, "learning_rate": 1.5514456279995317e-07, "logits/chosen": -3.5275001525878906, "logits/rejected": -3.3601009845733643, "logps/chosen": -513.3865356445312, "logps/rejected": -251.1932830810547, "loss": 0.2872, "rewards/accuracies": 0.875, "rewards/chosen": 0.10246928036212921, "rewards/margins": 1.4779465198516846, "rewards/rejected": -1.3754773139953613, "step": 4256 }, { "epoch": 0.49, "learning_rate": 1.5510944633032892e-07, "logits/chosen": -3.6029481887817383, "logits/rejected": -3.7882893085479736, "logps/chosen": -192.38475036621094, "logps/rejected": -277.5639343261719, "loss": 0.3912, "rewards/accuracies": 0.75, "rewards/chosen": -0.3484286069869995, "rewards/margins": 1.4554061889648438, "rewards/rejected": -1.8038346767425537, "step": 4257 }, { "epoch": 0.49, "learning_rate": 1.5507432986070467e-07, "logits/chosen": -2.7120699882507324, "logits/rejected": -2.444258689880371, "logps/chosen": -300.184326171875, "logps/rejected": -321.5472106933594, "loss": 0.286, "rewards/accuracies": 0.875, "rewards/chosen": 0.23452740907669067, "rewards/margins": 1.508455514907837, "rewards/rejected": -1.273928165435791, "step": 4258 }, { "epoch": 0.49, "learning_rate": 1.550392133910804e-07, "logits/chosen": -3.350231647491455, "logits/rejected": -3.3053174018859863, "logps/chosen": -189.99618530273438, "logps/rejected": -314.37884521484375, "loss": 0.2471, "rewards/accuracies": 0.875, "rewards/chosen": -0.02450786530971527, "rewards/margins": 3.134544849395752, "rewards/rejected": -3.159052610397339, "step": 4259 }, { "epoch": 0.49, "learning_rate": 1.5500409692145615e-07, "logits/chosen": -2.9293618202209473, "logits/rejected": -3.2945683002471924, "logps/chosen": -324.18304443359375, "logps/rejected": -179.22003173828125, "loss": 0.3123, "rewards/accuracies": 0.75, "rewards/chosen": 0.24158847332000732, "rewards/margins": 2.1195404529571533, "rewards/rejected": -1.877951979637146, "step": 4260 }, { "epoch": 0.49, "learning_rate": 1.5496898045183188e-07, "logits/chosen": -2.4225058555603027, "logits/rejected": -2.681544303894043, "logps/chosen": -364.311279296875, "logps/rejected": -360.9361572265625, "loss": 0.5676, "rewards/accuracies": 0.625, "rewards/chosen": -0.7741743326187134, "rewards/margins": 0.9486208558082581, "rewards/rejected": -1.7227951288223267, "step": 4261 }, { "epoch": 0.49, "learning_rate": 1.5493386398220764e-07, "logits/chosen": -3.041181802749634, "logits/rejected": -3.151578903198242, "logps/chosen": -365.9093017578125, "logps/rejected": -291.69696044921875, "loss": 0.3964, "rewards/accuracies": 0.75, "rewards/chosen": -0.07847069948911667, "rewards/margins": 1.0078822374343872, "rewards/rejected": -1.0863529443740845, "step": 4262 }, { "epoch": 0.49, "learning_rate": 1.5489874751258342e-07, "logits/chosen": -4.153163909912109, "logits/rejected": -3.8480470180511475, "logps/chosen": -333.59814453125, "logps/rejected": -252.2152099609375, "loss": 0.1713, "rewards/accuracies": 1.0, "rewards/chosen": 0.5049145817756653, "rewards/margins": 2.8392794132232666, "rewards/rejected": -2.334364891052246, "step": 4263 }, { "epoch": 0.49, "learning_rate": 1.5486363104295914e-07, "logits/chosen": -3.313730478286743, "logits/rejected": -3.125652313232422, "logps/chosen": -312.0753173828125, "logps/rejected": -302.1621398925781, "loss": 0.2836, "rewards/accuracies": 0.875, "rewards/chosen": 0.186031311750412, "rewards/margins": 2.134692668914795, "rewards/rejected": -1.9486613273620605, "step": 4264 }, { "epoch": 0.49, "learning_rate": 1.548285145733349e-07, "logits/chosen": -3.338886022567749, "logits/rejected": -3.135590076446533, "logps/chosen": -227.12118530273438, "logps/rejected": -209.9407958984375, "loss": 0.2044, "rewards/accuracies": 1.0, "rewards/chosen": 0.07148130238056183, "rewards/margins": 2.3658154010772705, "rewards/rejected": -2.2943339347839355, "step": 4265 }, { "epoch": 0.49, "learning_rate": 1.5479339810371065e-07, "logits/chosen": -3.3191421031951904, "logits/rejected": -3.0274291038513184, "logps/chosen": -284.0174255371094, "logps/rejected": -103.72421264648438, "loss": 0.7024, "rewards/accuracies": 0.5, "rewards/chosen": 0.06072060763835907, "rewards/margins": 0.7407540082931519, "rewards/rejected": -0.6800334453582764, "step": 4266 }, { "epoch": 0.49, "learning_rate": 1.5475828163408638e-07, "logits/chosen": -3.2885265350341797, "logits/rejected": -3.047283411026001, "logps/chosen": -310.8927917480469, "logps/rejected": -223.30227661132812, "loss": 0.6973, "rewards/accuracies": 0.75, "rewards/chosen": -0.5787513256072998, "rewards/margins": 0.13170669972896576, "rewards/rejected": -0.7104580998420715, "step": 4267 }, { "epoch": 0.49, "learning_rate": 1.5472316516446213e-07, "logits/chosen": -3.228416919708252, "logits/rejected": -3.5127434730529785, "logps/chosen": -190.70462036132812, "logps/rejected": -199.34390258789062, "loss": 0.3552, "rewards/accuracies": 0.75, "rewards/chosen": -0.11502022296190262, "rewards/margins": 2.401282548904419, "rewards/rejected": -2.5163025856018066, "step": 4268 }, { "epoch": 0.49, "learning_rate": 1.5468804869483786e-07, "logits/chosen": -2.7805447578430176, "logits/rejected": -2.699923038482666, "logps/chosen": -310.7448425292969, "logps/rejected": -249.8104248046875, "loss": 0.4198, "rewards/accuracies": 0.75, "rewards/chosen": 0.020518869161605835, "rewards/margins": 1.5944247245788574, "rewards/rejected": -1.5739057064056396, "step": 4269 }, { "epoch": 0.49, "learning_rate": 1.5465293222521361e-07, "logits/chosen": -2.729285955429077, "logits/rejected": -2.938908576965332, "logps/chosen": -197.95718383789062, "logps/rejected": -278.8391418457031, "loss": 0.2232, "rewards/accuracies": 1.0, "rewards/chosen": 0.23390258848667145, "rewards/margins": 3.3272204399108887, "rewards/rejected": -3.093317985534668, "step": 4270 }, { "epoch": 0.49, "learning_rate": 1.5461781575558937e-07, "logits/chosen": -2.3629744052886963, "logits/rejected": -2.420750141143799, "logps/chosen": -253.925537109375, "logps/rejected": -243.19322204589844, "loss": 0.339, "rewards/accuracies": 1.0, "rewards/chosen": 0.09295044839382172, "rewards/margins": 1.498010277748108, "rewards/rejected": -1.405059814453125, "step": 4271 }, { "epoch": 0.49, "learning_rate": 1.545826992859651e-07, "logits/chosen": -2.986513137817383, "logits/rejected": -2.91434383392334, "logps/chosen": -244.45603942871094, "logps/rejected": -154.60813903808594, "loss": 0.4283, "rewards/accuracies": 0.875, "rewards/chosen": 0.36696621775627136, "rewards/margins": 1.496220588684082, "rewards/rejected": -1.1292543411254883, "step": 4272 }, { "epoch": 0.49, "learning_rate": 1.5454758281634085e-07, "logits/chosen": -3.0584897994995117, "logits/rejected": -3.1681861877441406, "logps/chosen": -297.70306396484375, "logps/rejected": -277.62982177734375, "loss": 0.3067, "rewards/accuracies": 0.875, "rewards/chosen": -0.26723724603652954, "rewards/margins": 2.0633718967437744, "rewards/rejected": -2.3306093215942383, "step": 4273 }, { "epoch": 0.49, "learning_rate": 1.5451246634671663e-07, "logits/chosen": -2.5133297443389893, "logits/rejected": -2.6567559242248535, "logps/chosen": -272.3305358886719, "logps/rejected": -176.26513671875, "loss": 0.4541, "rewards/accuracies": 0.625, "rewards/chosen": 0.3723936676979065, "rewards/margins": 1.9132310152053833, "rewards/rejected": -1.5408375263214111, "step": 4274 }, { "epoch": 0.49, "learning_rate": 1.5447734987709236e-07, "logits/chosen": -3.7197265625, "logits/rejected": -3.4434762001037598, "logps/chosen": -340.06939697265625, "logps/rejected": -338.12518310546875, "loss": 0.7822, "rewards/accuracies": 0.625, "rewards/chosen": -0.37703490257263184, "rewards/margins": 1.6792926788330078, "rewards/rejected": -2.0563273429870605, "step": 4275 }, { "epoch": 0.49, "learning_rate": 1.544422334074681e-07, "logits/chosen": -3.471404552459717, "logits/rejected": -3.4031448364257812, "logps/chosen": -211.17233276367188, "logps/rejected": -255.6654052734375, "loss": 0.2198, "rewards/accuracies": 0.875, "rewards/chosen": 0.3411221206188202, "rewards/margins": 2.6552062034606934, "rewards/rejected": -2.314084053039551, "step": 4276 }, { "epoch": 0.49, "learning_rate": 1.5440711693784384e-07, "logits/chosen": -4.029463768005371, "logits/rejected": -3.7630763053894043, "logps/chosen": -203.9584197998047, "logps/rejected": -164.60647583007812, "loss": 0.7409, "rewards/accuracies": 0.875, "rewards/chosen": -0.31074589490890503, "rewards/margins": 1.895453691482544, "rewards/rejected": -2.2061996459960938, "step": 4277 }, { "epoch": 0.49, "learning_rate": 1.543720004682196e-07, "logits/chosen": -3.709306478500366, "logits/rejected": -3.6453418731689453, "logps/chosen": -163.29310607910156, "logps/rejected": -247.66061401367188, "loss": 0.3211, "rewards/accuracies": 0.875, "rewards/chosen": -0.14385786652565002, "rewards/margins": 1.9157966375350952, "rewards/rejected": -2.059654712677002, "step": 4278 }, { "epoch": 0.49, "learning_rate": 1.5433688399859535e-07, "logits/chosen": -2.556473970413208, "logits/rejected": -2.5628554821014404, "logps/chosen": -311.23046875, "logps/rejected": -297.63092041015625, "loss": 0.2219, "rewards/accuracies": 0.875, "rewards/chosen": -0.24030542373657227, "rewards/margins": 2.8378944396972656, "rewards/rejected": -3.078199863433838, "step": 4279 }, { "epoch": 0.49, "learning_rate": 1.5430176752897107e-07, "logits/chosen": -3.118302345275879, "logits/rejected": -3.322199821472168, "logps/chosen": -428.4708251953125, "logps/rejected": -363.459716796875, "loss": 0.3394, "rewards/accuracies": 0.875, "rewards/chosen": 0.08563782274723053, "rewards/margins": 2.6950995922088623, "rewards/rejected": -2.609461784362793, "step": 4280 }, { "epoch": 0.49, "learning_rate": 1.5426665105934683e-07, "logits/chosen": -2.991495370864868, "logits/rejected": -3.2710366249084473, "logps/chosen": -187.23611450195312, "logps/rejected": -214.78778076171875, "loss": 0.3038, "rewards/accuracies": 0.875, "rewards/chosen": -0.23089809715747833, "rewards/margins": 1.5562303066253662, "rewards/rejected": -1.7871284484863281, "step": 4281 }, { "epoch": 0.49, "learning_rate": 1.5423153458972258e-07, "logits/chosen": -3.504746913909912, "logits/rejected": -3.508291006088257, "logps/chosen": -140.92364501953125, "logps/rejected": -181.0018310546875, "loss": 0.3544, "rewards/accuracies": 0.75, "rewards/chosen": 0.08114970475435257, "rewards/margins": 1.6918983459472656, "rewards/rejected": -1.6107486486434937, "step": 4282 }, { "epoch": 0.49, "learning_rate": 1.541964181200983e-07, "logits/chosen": -2.5548577308654785, "logits/rejected": -2.3360445499420166, "logps/chosen": -436.4823913574219, "logps/rejected": -294.167724609375, "loss": 0.2517, "rewards/accuracies": 0.875, "rewards/chosen": 0.707363486289978, "rewards/margins": 1.8429533243179321, "rewards/rejected": -1.135589838027954, "step": 4283 }, { "epoch": 0.49, "learning_rate": 1.5416130165047406e-07, "logits/chosen": -3.411285877227783, "logits/rejected": -3.415295124053955, "logps/chosen": -207.49159240722656, "logps/rejected": -346.6177673339844, "loss": 0.6683, "rewards/accuracies": 0.625, "rewards/chosen": -0.4316660165786743, "rewards/margins": 1.3793939352035522, "rewards/rejected": -1.811059832572937, "step": 4284 }, { "epoch": 0.49, "learning_rate": 1.541261851808498e-07, "logits/chosen": -3.730743646621704, "logits/rejected": -3.716920852661133, "logps/chosen": -370.18170166015625, "logps/rejected": -274.3343200683594, "loss": 0.2513, "rewards/accuracies": 1.0, "rewards/chosen": -0.4774865508079529, "rewards/margins": 2.0788557529449463, "rewards/rejected": -2.556342124938965, "step": 4285 }, { "epoch": 0.49, "learning_rate": 1.5409106871122557e-07, "logits/chosen": -3.2651567459106445, "logits/rejected": -3.290393829345703, "logps/chosen": -223.17340087890625, "logps/rejected": -306.7315979003906, "loss": 0.3742, "rewards/accuracies": 0.75, "rewards/chosen": -0.016801893711090088, "rewards/margins": 1.3391062021255493, "rewards/rejected": -1.3559080362319946, "step": 4286 }, { "epoch": 0.49, "learning_rate": 1.5405595224160132e-07, "logits/chosen": -2.5970003604888916, "logits/rejected": -2.786433458328247, "logps/chosen": -164.9182586669922, "logps/rejected": -221.38204956054688, "loss": 0.4937, "rewards/accuracies": 0.625, "rewards/chosen": -0.24331718683242798, "rewards/margins": 1.5659312009811401, "rewards/rejected": -1.8092483282089233, "step": 4287 }, { "epoch": 0.49, "learning_rate": 1.5402083577197705e-07, "logits/chosen": -2.9223663806915283, "logits/rejected": -3.1509416103363037, "logps/chosen": -227.314208984375, "logps/rejected": -195.67503356933594, "loss": 0.2758, "rewards/accuracies": 0.875, "rewards/chosen": 0.0181247740983963, "rewards/margins": 3.06099796295166, "rewards/rejected": -3.0428733825683594, "step": 4288 }, { "epoch": 0.49, "learning_rate": 1.539857193023528e-07, "logits/chosen": -2.556455612182617, "logits/rejected": -2.560861587524414, "logps/chosen": -344.978271484375, "logps/rejected": -364.9228515625, "loss": 0.1787, "rewards/accuracies": 0.875, "rewards/chosen": 0.15989960730075836, "rewards/margins": 2.260153293609619, "rewards/rejected": -2.1002538204193115, "step": 4289 }, { "epoch": 0.49, "learning_rate": 1.5395060283272853e-07, "logits/chosen": -3.4339513778686523, "logits/rejected": -3.381817579269409, "logps/chosen": -194.7606201171875, "logps/rejected": -184.34353637695312, "loss": 0.388, "rewards/accuracies": 0.75, "rewards/chosen": -0.3357822299003601, "rewards/margins": 1.3374630212783813, "rewards/rejected": -1.6732453107833862, "step": 4290 }, { "epoch": 0.49, "learning_rate": 1.5391548636310429e-07, "logits/chosen": -3.5005691051483154, "logits/rejected": -3.5526740550994873, "logps/chosen": -449.18450927734375, "logps/rejected": -315.04766845703125, "loss": 0.1748, "rewards/accuracies": 1.0, "rewards/chosen": 0.6397150754928589, "rewards/margins": 2.375354766845703, "rewards/rejected": -1.7356396913528442, "step": 4291 }, { "epoch": 0.49, "learning_rate": 1.5388036989348004e-07, "logits/chosen": -3.597813606262207, "logits/rejected": -3.6424758434295654, "logps/chosen": -327.48828125, "logps/rejected": -206.46420288085938, "loss": 0.4587, "rewards/accuracies": 0.875, "rewards/chosen": 0.035460442304611206, "rewards/margins": 1.8111014366149902, "rewards/rejected": -1.7756409645080566, "step": 4292 }, { "epoch": 0.49, "learning_rate": 1.5384525342385577e-07, "logits/chosen": -2.477278709411621, "logits/rejected": -2.5465385913848877, "logps/chosen": -347.1973876953125, "logps/rejected": -219.20623779296875, "loss": 0.3007, "rewards/accuracies": 0.75, "rewards/chosen": 0.2723860442638397, "rewards/margins": 2.3205997943878174, "rewards/rejected": -2.0482139587402344, "step": 4293 }, { "epoch": 0.5, "learning_rate": 1.5381013695423152e-07, "logits/chosen": -3.118471622467041, "logits/rejected": -3.2404327392578125, "logps/chosen": -240.72564697265625, "logps/rejected": -210.66355895996094, "loss": 0.5995, "rewards/accuracies": 0.625, "rewards/chosen": -0.5552998781204224, "rewards/margins": 1.0253130197525024, "rewards/rejected": -1.5806130170822144, "step": 4294 }, { "epoch": 0.5, "learning_rate": 1.5377502048460727e-07, "logits/chosen": -2.632335662841797, "logits/rejected": -2.719329833984375, "logps/chosen": -306.66845703125, "logps/rejected": -174.6659698486328, "loss": 0.3442, "rewards/accuracies": 0.875, "rewards/chosen": 0.07939299196004868, "rewards/margins": 1.0492253303527832, "rewards/rejected": -0.9698323607444763, "step": 4295 }, { "epoch": 0.5, "learning_rate": 1.53739904014983e-07, "logits/chosen": -3.3876118659973145, "logits/rejected": -3.1232876777648926, "logps/chosen": -147.7434539794922, "logps/rejected": -283.1214599609375, "loss": 0.4113, "rewards/accuracies": 0.75, "rewards/chosen": -0.23189568519592285, "rewards/margins": 1.7073016166687012, "rewards/rejected": -1.9391974210739136, "step": 4296 }, { "epoch": 0.5, "learning_rate": 1.5370478754535878e-07, "logits/chosen": -2.832066535949707, "logits/rejected": -2.8992743492126465, "logps/chosen": -345.2873229980469, "logps/rejected": -313.4912109375, "loss": 0.3301, "rewards/accuracies": 0.875, "rewards/chosen": 0.4334893524646759, "rewards/margins": 3.3997535705566406, "rewards/rejected": -2.966264247894287, "step": 4297 }, { "epoch": 0.5, "learning_rate": 1.536696710757345e-07, "logits/chosen": -3.63503360748291, "logits/rejected": -3.5896596908569336, "logps/chosen": -240.21676635742188, "logps/rejected": -371.07830810546875, "loss": 0.136, "rewards/accuracies": 1.0, "rewards/chosen": 0.08506511151790619, "rewards/margins": 2.8511862754821777, "rewards/rejected": -2.7661213874816895, "step": 4298 }, { "epoch": 0.5, "learning_rate": 1.5363455460611026e-07, "logits/chosen": -2.9974560737609863, "logits/rejected": -2.824087142944336, "logps/chosen": -486.75323486328125, "logps/rejected": -281.33984375, "loss": 0.2544, "rewards/accuracies": 0.875, "rewards/chosen": 0.18109309673309326, "rewards/margins": 1.912896752357483, "rewards/rejected": -1.7318035364151, "step": 4299 }, { "epoch": 0.5, "learning_rate": 1.5359943813648602e-07, "logits/chosen": -3.6033737659454346, "logits/rejected": -3.24165678024292, "logps/chosen": -298.1138916015625, "logps/rejected": -315.6715087890625, "loss": 0.2376, "rewards/accuracies": 0.875, "rewards/chosen": -0.1615678071975708, "rewards/margins": 1.861816167831421, "rewards/rejected": -2.0233840942382812, "step": 4300 }, { "epoch": 0.5, "learning_rate": 1.5356432166686174e-07, "logits/chosen": -3.476006031036377, "logits/rejected": -3.3934459686279297, "logps/chosen": -332.7486572265625, "logps/rejected": -355.778564453125, "loss": 0.1822, "rewards/accuracies": 1.0, "rewards/chosen": 0.1129886731505394, "rewards/margins": 2.5368080139160156, "rewards/rejected": -2.4238193035125732, "step": 4301 }, { "epoch": 0.5, "learning_rate": 1.535292051972375e-07, "logits/chosen": -3.1930768489837646, "logits/rejected": -2.8808445930480957, "logps/chosen": -356.93890380859375, "logps/rejected": -379.7453308105469, "loss": 0.2241, "rewards/accuracies": 1.0, "rewards/chosen": 0.3100447952747345, "rewards/margins": 2.159363031387329, "rewards/rejected": -1.849318265914917, "step": 4302 }, { "epoch": 0.5, "learning_rate": 1.5349408872761325e-07, "logits/chosen": -3.519726276397705, "logits/rejected": -3.567011833190918, "logps/chosen": -223.61976623535156, "logps/rejected": -180.75791931152344, "loss": 0.9607, "rewards/accuracies": 0.5, "rewards/chosen": -1.0219066143035889, "rewards/margins": 0.00025978684425354004, "rewards/rejected": -1.0221664905548096, "step": 4303 }, { "epoch": 0.5, "learning_rate": 1.5345897225798898e-07, "logits/chosen": -2.596865177154541, "logits/rejected": -2.8274571895599365, "logps/chosen": -218.16639709472656, "logps/rejected": -275.51434326171875, "loss": 0.3386, "rewards/accuracies": 0.875, "rewards/chosen": -0.1814868152141571, "rewards/margins": 2.738020896911621, "rewards/rejected": -2.9195075035095215, "step": 4304 }, { "epoch": 0.5, "learning_rate": 1.5342385578836473e-07, "logits/chosen": -3.5933730602264404, "logits/rejected": -3.670039653778076, "logps/chosen": -141.3319091796875, "logps/rejected": -134.354248046875, "loss": 0.3227, "rewards/accuracies": 1.0, "rewards/chosen": 0.26410186290740967, "rewards/margins": 1.3488707542419434, "rewards/rejected": -1.0847687721252441, "step": 4305 }, { "epoch": 0.5, "learning_rate": 1.5338873931874046e-07, "logits/chosen": -3.088113784790039, "logits/rejected": -3.3945322036743164, "logps/chosen": -308.6744079589844, "logps/rejected": -274.1036376953125, "loss": 0.2753, "rewards/accuracies": 0.875, "rewards/chosen": 0.05853720009326935, "rewards/margins": 1.9904723167419434, "rewards/rejected": -1.9319349527359009, "step": 4306 }, { "epoch": 0.5, "learning_rate": 1.5335362284911621e-07, "logits/chosen": -2.7588510513305664, "logits/rejected": -2.8285720348358154, "logps/chosen": -318.97021484375, "logps/rejected": -283.6190490722656, "loss": 0.5562, "rewards/accuracies": 0.625, "rewards/chosen": -0.08488290011882782, "rewards/margins": 1.5864958763122559, "rewards/rejected": -1.6713788509368896, "step": 4307 }, { "epoch": 0.5, "learning_rate": 1.53318506379492e-07, "logits/chosen": -2.623995065689087, "logits/rejected": -2.3765671253204346, "logps/chosen": -147.02392578125, "logps/rejected": -204.38284301757812, "loss": 0.635, "rewards/accuracies": 0.75, "rewards/chosen": -0.36885184049606323, "rewards/margins": 1.0702910423278809, "rewards/rejected": -1.4391427040100098, "step": 4308 }, { "epoch": 0.5, "learning_rate": 1.5328338990986772e-07, "logits/chosen": -3.4304025173187256, "logits/rejected": -2.8442471027374268, "logps/chosen": -546.1985473632812, "logps/rejected": -297.4148864746094, "loss": 0.4996, "rewards/accuracies": 0.75, "rewards/chosen": -0.05596911907196045, "rewards/margins": 1.4554903507232666, "rewards/rejected": -1.5114593505859375, "step": 4309 }, { "epoch": 0.5, "learning_rate": 1.5324827344024348e-07, "logits/chosen": -3.1929404735565186, "logits/rejected": -3.457733631134033, "logps/chosen": -171.1824951171875, "logps/rejected": -212.07928466796875, "loss": 0.5239, "rewards/accuracies": 0.875, "rewards/chosen": -0.5457698106765747, "rewards/margins": 1.8894262313842773, "rewards/rejected": -2.4351961612701416, "step": 4310 }, { "epoch": 0.5, "learning_rate": 1.5321315697061923e-07, "logits/chosen": -3.421989917755127, "logits/rejected": -3.1232411861419678, "logps/chosen": -157.9214324951172, "logps/rejected": -227.15835571289062, "loss": 0.2859, "rewards/accuracies": 0.875, "rewards/chosen": 0.05309556424617767, "rewards/margins": 1.7201213836669922, "rewards/rejected": -1.6670256853103638, "step": 4311 }, { "epoch": 0.5, "learning_rate": 1.5317804050099496e-07, "logits/chosen": -3.571376323699951, "logits/rejected": -3.7931506633758545, "logps/chosen": -215.34329223632812, "logps/rejected": -249.10101318359375, "loss": 0.2702, "rewards/accuracies": 0.875, "rewards/chosen": -0.007148772478103638, "rewards/margins": 2.550438165664673, "rewards/rejected": -2.557586908340454, "step": 4312 }, { "epoch": 0.5, "learning_rate": 1.531429240313707e-07, "logits/chosen": -2.622481346130371, "logits/rejected": -3.104994773864746, "logps/chosen": -332.6460266113281, "logps/rejected": -287.17071533203125, "loss": 0.3391, "rewards/accuracies": 0.875, "rewards/chosen": 0.38218504190444946, "rewards/margins": 3.495710849761963, "rewards/rejected": -3.113525867462158, "step": 4313 }, { "epoch": 0.5, "learning_rate": 1.5310780756174644e-07, "logits/chosen": -2.9433114528656006, "logits/rejected": -3.0924386978149414, "logps/chosen": -253.6914825439453, "logps/rejected": -263.57550048828125, "loss": 0.6465, "rewards/accuracies": 0.625, "rewards/chosen": -0.5032229423522949, "rewards/margins": 1.1186459064483643, "rewards/rejected": -1.6218688488006592, "step": 4314 }, { "epoch": 0.5, "learning_rate": 1.530726910921222e-07, "logits/chosen": -3.1789400577545166, "logits/rejected": -3.2611005306243896, "logps/chosen": -283.39642333984375, "logps/rejected": -310.5025634765625, "loss": 0.723, "rewards/accuracies": 0.5, "rewards/chosen": -0.042109109461307526, "rewards/margins": 1.278292179107666, "rewards/rejected": -1.3204011917114258, "step": 4315 }, { "epoch": 0.5, "learning_rate": 1.5303757462249795e-07, "logits/chosen": -3.6112489700317383, "logits/rejected": -3.9560658931732178, "logps/chosen": -143.69947814941406, "logps/rejected": -203.34048461914062, "loss": 0.1541, "rewards/accuracies": 1.0, "rewards/chosen": 0.30643394589424133, "rewards/margins": 2.8478317260742188, "rewards/rejected": -2.541398048400879, "step": 4316 }, { "epoch": 0.5, "learning_rate": 1.5300245815287367e-07, "logits/chosen": -3.8152012825012207, "logits/rejected": -3.7606964111328125, "logps/chosen": -184.01206970214844, "logps/rejected": -283.0292053222656, "loss": 0.4219, "rewards/accuracies": 0.75, "rewards/chosen": -0.1609235256910324, "rewards/margins": 1.2734662294387817, "rewards/rejected": -1.4343898296356201, "step": 4317 }, { "epoch": 0.5, "learning_rate": 1.5296734168324943e-07, "logits/chosen": -2.952589988708496, "logits/rejected": -3.237651824951172, "logps/chosen": -145.03277587890625, "logps/rejected": -275.06793212890625, "loss": 0.4529, "rewards/accuracies": 0.875, "rewards/chosen": 0.17373663187026978, "rewards/margins": 1.7595665454864502, "rewards/rejected": -1.5858299732208252, "step": 4318 }, { "epoch": 0.5, "learning_rate": 1.529322252136252e-07, "logits/chosen": -3.3865129947662354, "logits/rejected": -2.983956813812256, "logps/chosen": -267.1505126953125, "logps/rejected": -288.906494140625, "loss": 0.2509, "rewards/accuracies": 0.875, "rewards/chosen": -0.41714102029800415, "rewards/margins": 1.9098066091537476, "rewards/rejected": -2.3269476890563965, "step": 4319 }, { "epoch": 0.5, "learning_rate": 1.5289710874400094e-07, "logits/chosen": -2.7838242053985596, "logits/rejected": -2.710378646850586, "logps/chosen": -312.75457763671875, "logps/rejected": -226.76536560058594, "loss": 0.3814, "rewards/accuracies": 0.75, "rewards/chosen": -0.38128241896629333, "rewards/margins": 1.5275380611419678, "rewards/rejected": -1.908820629119873, "step": 4320 }, { "epoch": 0.5, "learning_rate": 1.528619922743767e-07, "logits/chosen": -3.054166793823242, "logits/rejected": -3.326118230819702, "logps/chosen": -221.9783935546875, "logps/rejected": -264.2236328125, "loss": 0.2025, "rewards/accuracies": 0.875, "rewards/chosen": 0.2801345884799957, "rewards/margins": 2.543579339981079, "rewards/rejected": -2.2634449005126953, "step": 4321 }, { "epoch": 0.5, "learning_rate": 1.5282687580475242e-07, "logits/chosen": -3.012678623199463, "logits/rejected": -2.8826327323913574, "logps/chosen": -158.38796997070312, "logps/rejected": -128.5760040283203, "loss": 0.4726, "rewards/accuracies": 0.75, "rewards/chosen": -0.17204123735427856, "rewards/margins": 0.9547150135040283, "rewards/rejected": -1.126756191253662, "step": 4322 }, { "epoch": 0.5, "learning_rate": 1.5279175933512817e-07, "logits/chosen": -2.7960379123687744, "logits/rejected": -2.6179189682006836, "logps/chosen": -263.7035827636719, "logps/rejected": -166.1133270263672, "loss": 0.4477, "rewards/accuracies": 0.75, "rewards/chosen": -0.14601784944534302, "rewards/margins": 1.0945788621902466, "rewards/rejected": -1.2405967712402344, "step": 4323 }, { "epoch": 0.5, "learning_rate": 1.5275664286550392e-07, "logits/chosen": -3.5331737995147705, "logits/rejected": -3.1034798622131348, "logps/chosen": -258.3570251464844, "logps/rejected": -240.05247497558594, "loss": 0.4052, "rewards/accuracies": 0.75, "rewards/chosen": 0.11056377738714218, "rewards/margins": 1.030532956123352, "rewards/rejected": -0.9199692010879517, "step": 4324 }, { "epoch": 0.5, "learning_rate": 1.5272152639587965e-07, "logits/chosen": -3.2509894371032715, "logits/rejected": -3.2348217964172363, "logps/chosen": -153.45252990722656, "logps/rejected": -186.507568359375, "loss": 0.3041, "rewards/accuracies": 0.75, "rewards/chosen": -0.18447771668434143, "rewards/margins": 1.9643664360046387, "rewards/rejected": -2.1488442420959473, "step": 4325 }, { "epoch": 0.5, "learning_rate": 1.526864099262554e-07, "logits/chosen": -3.0422749519348145, "logits/rejected": -2.887605667114258, "logps/chosen": -350.14227294921875, "logps/rejected": -392.3241882324219, "loss": 0.2988, "rewards/accuracies": 1.0, "rewards/chosen": 0.4190361499786377, "rewards/margins": 1.4657340049743652, "rewards/rejected": -1.046697735786438, "step": 4326 }, { "epoch": 0.5, "learning_rate": 1.5265129345663116e-07, "logits/chosen": -3.6773428916931152, "logits/rejected": -3.4764792919158936, "logps/chosen": -216.73037719726562, "logps/rejected": -243.27769470214844, "loss": 0.7284, "rewards/accuracies": 0.75, "rewards/chosen": -0.2985934615135193, "rewards/margins": 0.9518216848373413, "rewards/rejected": -1.2504152059555054, "step": 4327 }, { "epoch": 0.5, "learning_rate": 1.526161769870069e-07, "logits/chosen": -2.914721965789795, "logits/rejected": -2.5376343727111816, "logps/chosen": -276.6370544433594, "logps/rejected": -336.686279296875, "loss": 0.465, "rewards/accuracies": 0.75, "rewards/chosen": -0.09736742824316025, "rewards/margins": 1.4225717782974243, "rewards/rejected": -1.5199393033981323, "step": 4328 }, { "epoch": 0.5, "learning_rate": 1.5258106051738264e-07, "logits/chosen": -3.0304510593414307, "logits/rejected": -2.9962449073791504, "logps/chosen": -171.1322479248047, "logps/rejected": -238.05624389648438, "loss": 0.2156, "rewards/accuracies": 1.0, "rewards/chosen": 0.010245583951473236, "rewards/margins": 3.0334830284118652, "rewards/rejected": -3.023237466812134, "step": 4329 }, { "epoch": 0.5, "learning_rate": 1.5254594404775837e-07, "logits/chosen": -2.7044529914855957, "logits/rejected": -2.591447591781616, "logps/chosen": -297.8039245605469, "logps/rejected": -254.88629150390625, "loss": 0.5128, "rewards/accuracies": 0.625, "rewards/chosen": -0.2347683161497116, "rewards/margins": 1.2874822616577148, "rewards/rejected": -1.5222506523132324, "step": 4330 }, { "epoch": 0.5, "learning_rate": 1.5251082757813415e-07, "logits/chosen": -3.270585060119629, "logits/rejected": -3.217857599258423, "logps/chosen": -245.9214324951172, "logps/rejected": -277.43768310546875, "loss": 0.328, "rewards/accuracies": 0.875, "rewards/chosen": 0.22295339405536652, "rewards/margins": 1.4489483833312988, "rewards/rejected": -1.2259950637817383, "step": 4331 }, { "epoch": 0.5, "learning_rate": 1.524757111085099e-07, "logits/chosen": -2.6677193641662598, "logits/rejected": -2.567304849624634, "logps/chosen": -245.222412109375, "logps/rejected": -300.80816650390625, "loss": 0.5077, "rewards/accuracies": 0.75, "rewards/chosen": -0.2571207284927368, "rewards/margins": 1.7925922870635986, "rewards/rejected": -2.049712896347046, "step": 4332 }, { "epoch": 0.5, "learning_rate": 1.5244059463888563e-07, "logits/chosen": -2.9411396980285645, "logits/rejected": -3.1203718185424805, "logps/chosen": -243.03067016601562, "logps/rejected": -242.32382202148438, "loss": 0.5715, "rewards/accuracies": 0.75, "rewards/chosen": -0.36854732036590576, "rewards/margins": 1.3076350688934326, "rewards/rejected": -1.6761822700500488, "step": 4333 }, { "epoch": 0.5, "learning_rate": 1.5240547816926138e-07, "logits/chosen": -3.131685495376587, "logits/rejected": -3.1436080932617188, "logps/chosen": -210.40829467773438, "logps/rejected": -194.9435577392578, "loss": 0.3698, "rewards/accuracies": 0.75, "rewards/chosen": 0.26902303099632263, "rewards/margins": 1.715477466583252, "rewards/rejected": -1.446454405784607, "step": 4334 }, { "epoch": 0.5, "learning_rate": 1.523703616996371e-07, "logits/chosen": -2.9350483417510986, "logits/rejected": -3.36898136138916, "logps/chosen": -350.0063171386719, "logps/rejected": -315.55999755859375, "loss": 0.4473, "rewards/accuracies": 0.875, "rewards/chosen": 0.021467216312885284, "rewards/margins": 1.919000506401062, "rewards/rejected": -1.8975332975387573, "step": 4335 }, { "epoch": 0.5, "learning_rate": 1.5233524523001286e-07, "logits/chosen": -3.468585968017578, "logits/rejected": -3.4708735942840576, "logps/chosen": -259.3088073730469, "logps/rejected": -275.88983154296875, "loss": 0.2177, "rewards/accuracies": 0.875, "rewards/chosen": 0.21335579454898834, "rewards/margins": 3.158236503601074, "rewards/rejected": -2.944880723953247, "step": 4336 }, { "epoch": 0.5, "learning_rate": 1.5230012876038862e-07, "logits/chosen": -3.5894057750701904, "logits/rejected": -4.07175350189209, "logps/chosen": -258.2807312011719, "logps/rejected": -377.88433837890625, "loss": 0.1754, "rewards/accuracies": 0.875, "rewards/chosen": -0.020924709737300873, "rewards/margins": 2.864034414291382, "rewards/rejected": -2.8849592208862305, "step": 4337 }, { "epoch": 0.5, "learning_rate": 1.5226501229076435e-07, "logits/chosen": -2.7482848167419434, "logits/rejected": -2.8134498596191406, "logps/chosen": -220.245849609375, "logps/rejected": -244.49044799804688, "loss": 0.5872, "rewards/accuracies": 0.625, "rewards/chosen": -0.9350711703300476, "rewards/margins": 0.6833840608596802, "rewards/rejected": -1.618455171585083, "step": 4338 }, { "epoch": 0.5, "learning_rate": 1.522298958211401e-07, "logits/chosen": -3.1900813579559326, "logits/rejected": -2.983151912689209, "logps/chosen": -287.6822204589844, "logps/rejected": -232.12005615234375, "loss": 0.4724, "rewards/accuracies": 0.875, "rewards/chosen": -0.595203161239624, "rewards/margins": 1.861502766609192, "rewards/rejected": -2.4567060470581055, "step": 4339 }, { "epoch": 0.5, "learning_rate": 1.5219477935151588e-07, "logits/chosen": -3.405463933944702, "logits/rejected": -3.8356313705444336, "logps/chosen": -241.0276641845703, "logps/rejected": -344.3280334472656, "loss": 0.2981, "rewards/accuracies": 0.875, "rewards/chosen": 0.740489661693573, "rewards/margins": 2.790050983428955, "rewards/rejected": -2.0495612621307373, "step": 4340 }, { "epoch": 0.5, "learning_rate": 1.5215966288189158e-07, "logits/chosen": -2.251642942428589, "logits/rejected": -2.215527296066284, "logps/chosen": -241.87911987304688, "logps/rejected": -272.67401123046875, "loss": 0.3485, "rewards/accuracies": 0.875, "rewards/chosen": -0.14238114655017853, "rewards/margins": 2.0987648963928223, "rewards/rejected": -2.2411460876464844, "step": 4341 }, { "epoch": 0.5, "learning_rate": 1.5212454641226736e-07, "logits/chosen": -2.880380392074585, "logits/rejected": -3.083404064178467, "logps/chosen": -285.37188720703125, "logps/rejected": -216.2775115966797, "loss": 0.4331, "rewards/accuracies": 0.75, "rewards/chosen": -0.20240969955921173, "rewards/margins": 1.6976351737976074, "rewards/rejected": -1.9000449180603027, "step": 4342 }, { "epoch": 0.5, "learning_rate": 1.520894299426431e-07, "logits/chosen": -2.954178810119629, "logits/rejected": -2.8907527923583984, "logps/chosen": -460.04095458984375, "logps/rejected": -347.473388671875, "loss": 0.8078, "rewards/accuracies": 0.625, "rewards/chosen": -1.3514825105667114, "rewards/margins": 0.2154478132724762, "rewards/rejected": -1.5669302940368652, "step": 4343 }, { "epoch": 0.5, "learning_rate": 1.5205431347301884e-07, "logits/chosen": -2.8842124938964844, "logits/rejected": -2.9241912364959717, "logps/chosen": -422.3749694824219, "logps/rejected": -245.2810516357422, "loss": 0.4856, "rewards/accuracies": 0.75, "rewards/chosen": -0.8633109331130981, "rewards/margins": 0.9062386751174927, "rewards/rejected": -1.7695496082305908, "step": 4344 }, { "epoch": 0.5, "learning_rate": 1.520191970033946e-07, "logits/chosen": -3.1952526569366455, "logits/rejected": -3.0497982501983643, "logps/chosen": -247.4343719482422, "logps/rejected": -217.57260131835938, "loss": 0.4565, "rewards/accuracies": 0.875, "rewards/chosen": -0.8429983854293823, "rewards/margins": 1.032486915588379, "rewards/rejected": -1.8754854202270508, "step": 4345 }, { "epoch": 0.5, "learning_rate": 1.5198408053377032e-07, "logits/chosen": -3.1732306480407715, "logits/rejected": -2.5270819664001465, "logps/chosen": -187.05599975585938, "logps/rejected": -195.52894592285156, "loss": 0.9775, "rewards/accuracies": 0.375, "rewards/chosen": -0.39661121368408203, "rewards/margins": -0.39676156640052795, "rewards/rejected": 0.00015035271644592285, "step": 4346 }, { "epoch": 0.5, "learning_rate": 1.5194896406414608e-07, "logits/chosen": -2.3318662643432617, "logits/rejected": -2.538515329360962, "logps/chosen": -413.87493896484375, "logps/rejected": -386.77423095703125, "loss": 0.3905, "rewards/accuracies": 0.75, "rewards/chosen": -0.218570739030838, "rewards/margins": 1.9617739915847778, "rewards/rejected": -2.180344581604004, "step": 4347 }, { "epoch": 0.5, "learning_rate": 1.5191384759452183e-07, "logits/chosen": -3.0957298278808594, "logits/rejected": -3.1337296962738037, "logps/chosen": -204.4673309326172, "logps/rejected": -226.9080810546875, "loss": 0.5102, "rewards/accuracies": 0.875, "rewards/chosen": -0.5207543969154358, "rewards/margins": 1.1098259687423706, "rewards/rejected": -1.6305804252624512, "step": 4348 }, { "epoch": 0.5, "learning_rate": 1.5187873112489756e-07, "logits/chosen": -3.4197230339050293, "logits/rejected": -3.3105177879333496, "logps/chosen": -175.8695068359375, "logps/rejected": -191.9119110107422, "loss": 0.4738, "rewards/accuracies": 0.75, "rewards/chosen": -0.4821392297744751, "rewards/margins": 1.9957146644592285, "rewards/rejected": -2.477854013442993, "step": 4349 }, { "epoch": 0.5, "learning_rate": 1.518436146552733e-07, "logits/chosen": -3.420919895172119, "logits/rejected": -3.5084307193756104, "logps/chosen": -402.57720947265625, "logps/rejected": -259.1300048828125, "loss": 0.2908, "rewards/accuracies": 0.875, "rewards/chosen": -0.11197924613952637, "rewards/margins": 2.2586443424224854, "rewards/rejected": -2.3706235885620117, "step": 4350 }, { "epoch": 0.5, "learning_rate": 1.5180849818564904e-07, "logits/chosen": -2.614835023880005, "logits/rejected": -2.646704912185669, "logps/chosen": -364.3536376953125, "logps/rejected": -387.14105224609375, "loss": 0.3531, "rewards/accuracies": 0.875, "rewards/chosen": 0.031184419989585876, "rewards/margins": 1.9985307455062866, "rewards/rejected": -1.9673463106155396, "step": 4351 }, { "epoch": 0.5, "learning_rate": 1.517733817160248e-07, "logits/chosen": -3.0071463584899902, "logits/rejected": -2.820805072784424, "logps/chosen": -487.60791015625, "logps/rejected": -267.1512451171875, "loss": 0.4815, "rewards/accuracies": 0.75, "rewards/chosen": -0.1285768747329712, "rewards/margins": 1.3028665781021118, "rewards/rejected": -1.431443452835083, "step": 4352 }, { "epoch": 0.5, "learning_rate": 1.5173826524640057e-07, "logits/chosen": -2.438716173171997, "logits/rejected": -2.2845852375030518, "logps/chosen": -333.16571044921875, "logps/rejected": -378.7651672363281, "loss": 0.4016, "rewards/accuracies": 0.625, "rewards/chosen": 0.021643638610839844, "rewards/margins": 2.5265846252441406, "rewards/rejected": -2.504940986633301, "step": 4353 }, { "epoch": 0.5, "learning_rate": 1.517031487767763e-07, "logits/chosen": -2.881808280944824, "logits/rejected": -2.944437026977539, "logps/chosen": -266.9672546386719, "logps/rejected": -256.4344177246094, "loss": 0.178, "rewards/accuracies": 1.0, "rewards/chosen": 0.4486692547798157, "rewards/margins": 2.8912971019744873, "rewards/rejected": -2.4426279067993164, "step": 4354 }, { "epoch": 0.5, "learning_rate": 1.5166803230715206e-07, "logits/chosen": -2.6030170917510986, "logits/rejected": -2.6742677688598633, "logps/chosen": -344.979736328125, "logps/rejected": -435.9056701660156, "loss": 0.4569, "rewards/accuracies": 0.875, "rewards/chosen": 0.1490846574306488, "rewards/margins": 1.406320571899414, "rewards/rejected": -1.2572360038757324, "step": 4355 }, { "epoch": 0.5, "learning_rate": 1.516329158375278e-07, "logits/chosen": -3.1855664253234863, "logits/rejected": -2.9454476833343506, "logps/chosen": -236.99270629882812, "logps/rejected": -241.7470245361328, "loss": 0.2038, "rewards/accuracies": 1.0, "rewards/chosen": 0.23349729180335999, "rewards/margins": 1.9345985651016235, "rewards/rejected": -1.7011014223098755, "step": 4356 }, { "epoch": 0.5, "learning_rate": 1.5159779936790354e-07, "logits/chosen": -3.0357120037078857, "logits/rejected": -3.1227214336395264, "logps/chosen": -132.56565856933594, "logps/rejected": -178.5340576171875, "loss": 0.5921, "rewards/accuracies": 0.75, "rewards/chosen": -0.5090728998184204, "rewards/margins": 0.8391250967979431, "rewards/rejected": -1.3481979370117188, "step": 4357 }, { "epoch": 0.5, "learning_rate": 1.515626828982793e-07, "logits/chosen": -2.733494758605957, "logits/rejected": -2.66791033744812, "logps/chosen": -274.7735595703125, "logps/rejected": -298.78363037109375, "loss": 0.2522, "rewards/accuracies": 1.0, "rewards/chosen": 0.1007273867726326, "rewards/margins": 1.97842538356781, "rewards/rejected": -1.8776981830596924, "step": 4358 }, { "epoch": 0.5, "learning_rate": 1.5152756642865502e-07, "logits/chosen": -3.2907021045684814, "logits/rejected": -3.1837379932403564, "logps/chosen": -131.64645385742188, "logps/rejected": -244.5100860595703, "loss": 0.2827, "rewards/accuracies": 1.0, "rewards/chosen": -0.09286855161190033, "rewards/margins": 1.8562700748443604, "rewards/rejected": -1.9491386413574219, "step": 4359 }, { "epoch": 0.5, "learning_rate": 1.5149244995903077e-07, "logits/chosen": -3.263777732849121, "logits/rejected": -3.0694971084594727, "logps/chosen": -238.61192321777344, "logps/rejected": -164.87884521484375, "loss": 0.6013, "rewards/accuracies": 0.75, "rewards/chosen": -0.23120154440402985, "rewards/margins": 0.8004652261734009, "rewards/rejected": -1.0316667556762695, "step": 4360 }, { "epoch": 0.5, "learning_rate": 1.5145733348940653e-07, "logits/chosen": -2.444157123565674, "logits/rejected": -2.5378336906433105, "logps/chosen": -201.5517120361328, "logps/rejected": -304.38702392578125, "loss": 0.2951, "rewards/accuracies": 0.875, "rewards/chosen": 0.35066503286361694, "rewards/margins": 2.5386009216308594, "rewards/rejected": -2.1879358291625977, "step": 4361 }, { "epoch": 0.5, "learning_rate": 1.5142221701978225e-07, "logits/chosen": -2.8233203887939453, "logits/rejected": -2.3043649196624756, "logps/chosen": -244.50408935546875, "logps/rejected": -249.8031005859375, "loss": 0.8239, "rewards/accuracies": 0.5, "rewards/chosen": -0.23058897256851196, "rewards/margins": 0.511406421661377, "rewards/rejected": -0.7419954538345337, "step": 4362 }, { "epoch": 0.5, "learning_rate": 1.51387100550158e-07, "logits/chosen": -2.7693850994110107, "logits/rejected": -2.70414400100708, "logps/chosen": -248.0126953125, "logps/rejected": -299.70587158203125, "loss": 0.2925, "rewards/accuracies": 0.875, "rewards/chosen": 0.12779343128204346, "rewards/margins": 1.714442253112793, "rewards/rejected": -1.586648941040039, "step": 4363 }, { "epoch": 0.5, "learning_rate": 1.513519840805338e-07, "logits/chosen": -2.958725690841675, "logits/rejected": -2.4526329040527344, "logps/chosen": -279.5599670410156, "logps/rejected": -218.04818725585938, "loss": 0.4381, "rewards/accuracies": 0.875, "rewards/chosen": -0.46563881635665894, "rewards/margins": 1.102436900138855, "rewards/rejected": -1.5680756568908691, "step": 4364 }, { "epoch": 0.5, "learning_rate": 1.5131686761090951e-07, "logits/chosen": -4.099704742431641, "logits/rejected": -3.954615354537964, "logps/chosen": -390.666259765625, "logps/rejected": -342.74322509765625, "loss": 0.2235, "rewards/accuracies": 0.875, "rewards/chosen": 0.15247058868408203, "rewards/margins": 2.370198965072632, "rewards/rejected": -2.217728614807129, "step": 4365 }, { "epoch": 0.5, "learning_rate": 1.5128175114128527e-07, "logits/chosen": -3.5607848167419434, "logits/rejected": -3.17744779586792, "logps/chosen": -209.37461853027344, "logps/rejected": -261.11126708984375, "loss": 0.746, "rewards/accuracies": 0.75, "rewards/chosen": -0.43594038486480713, "rewards/margins": 0.322238951921463, "rewards/rejected": -0.7581793665885925, "step": 4366 }, { "epoch": 0.5, "learning_rate": 1.51246634671661e-07, "logits/chosen": -3.3152482509613037, "logits/rejected": -3.2359282970428467, "logps/chosen": -424.8460388183594, "logps/rejected": -372.7846374511719, "loss": 0.3487, "rewards/accuracies": 0.75, "rewards/chosen": -0.08495499193668365, "rewards/margins": 2.606382369995117, "rewards/rejected": -2.6913375854492188, "step": 4367 }, { "epoch": 0.5, "learning_rate": 1.5121151820203675e-07, "logits/chosen": -3.177506446838379, "logits/rejected": -3.040463924407959, "logps/chosen": -370.3385925292969, "logps/rejected": -255.0132293701172, "loss": 0.7806, "rewards/accuracies": 0.625, "rewards/chosen": -0.143892303109169, "rewards/margins": 0.7528940439224243, "rewards/rejected": -0.8967862725257874, "step": 4368 }, { "epoch": 0.5, "learning_rate": 1.511764017324125e-07, "logits/chosen": -2.9256277084350586, "logits/rejected": -2.79984450340271, "logps/chosen": -167.287353515625, "logps/rejected": -144.92601013183594, "loss": 0.1846, "rewards/accuracies": 1.0, "rewards/chosen": -0.27300214767456055, "rewards/margins": 2.126824378967285, "rewards/rejected": -2.3998262882232666, "step": 4369 }, { "epoch": 0.5, "learning_rate": 1.5114128526278823e-07, "logits/chosen": -3.4003474712371826, "logits/rejected": -3.333348035812378, "logps/chosen": -208.35316467285156, "logps/rejected": -236.85565185546875, "loss": 0.3747, "rewards/accuracies": 0.75, "rewards/chosen": -0.07052738964557648, "rewards/margins": 1.8659526109695435, "rewards/rejected": -1.936479926109314, "step": 4370 }, { "epoch": 0.5, "learning_rate": 1.5110616879316399e-07, "logits/chosen": -3.586909055709839, "logits/rejected": -3.4962854385375977, "logps/chosen": -311.3375549316406, "logps/rejected": -300.5721435546875, "loss": 0.3182, "rewards/accuracies": 0.875, "rewards/chosen": -0.27034351229667664, "rewards/margins": 2.939575672149658, "rewards/rejected": -3.2099194526672363, "step": 4371 }, { "epoch": 0.5, "learning_rate": 1.5107105232353974e-07, "logits/chosen": -3.6852259635925293, "logits/rejected": -3.8589088916778564, "logps/chosen": -217.17784118652344, "logps/rejected": -229.31895446777344, "loss": 0.3724, "rewards/accuracies": 0.875, "rewards/chosen": -0.5467852354049683, "rewards/margins": 2.5945963859558105, "rewards/rejected": -3.1413817405700684, "step": 4372 }, { "epoch": 0.5, "learning_rate": 1.5103593585391547e-07, "logits/chosen": -3.1161463260650635, "logits/rejected": -3.361480474472046, "logps/chosen": -212.10824584960938, "logps/rejected": -285.73980712890625, "loss": 0.4869, "rewards/accuracies": 0.75, "rewards/chosen": -0.25610148906707764, "rewards/margins": 1.057396411895752, "rewards/rejected": -1.3134979009628296, "step": 4373 }, { "epoch": 0.5, "learning_rate": 1.5100081938429125e-07, "logits/chosen": -2.880406379699707, "logits/rejected": -2.973616361618042, "logps/chosen": -232.09947204589844, "logps/rejected": -239.45208740234375, "loss": 0.1625, "rewards/accuracies": 1.0, "rewards/chosen": 0.39500609040260315, "rewards/margins": 2.0360450744628906, "rewards/rejected": -1.641039252281189, "step": 4374 }, { "epoch": 0.5, "learning_rate": 1.5096570291466695e-07, "logits/chosen": -3.364504337310791, "logits/rejected": -2.9983022212982178, "logps/chosen": -332.7726135253906, "logps/rejected": -205.62091064453125, "loss": 0.7572, "rewards/accuracies": 0.75, "rewards/chosen": -0.37113887071609497, "rewards/margins": 1.4453901052474976, "rewards/rejected": -1.8165290355682373, "step": 4375 }, { "epoch": 0.5, "learning_rate": 1.5093058644504273e-07, "logits/chosen": -3.013300895690918, "logits/rejected": -3.1054279804229736, "logps/chosen": -268.9093322753906, "logps/rejected": -321.533935546875, "loss": 0.7096, "rewards/accuracies": 0.75, "rewards/chosen": -0.377937376499176, "rewards/margins": 1.3101989030838013, "rewards/rejected": -1.688136339187622, "step": 4376 }, { "epoch": 0.5, "learning_rate": 1.5089546997541848e-07, "logits/chosen": -3.387188673019409, "logits/rejected": -3.7440662384033203, "logps/chosen": -219.77719116210938, "logps/rejected": -246.3402557373047, "loss": 0.5868, "rewards/accuracies": 0.625, "rewards/chosen": -0.4854031503200531, "rewards/margins": 1.3160405158996582, "rewards/rejected": -1.8014435768127441, "step": 4377 }, { "epoch": 0.5, "learning_rate": 1.508603535057942e-07, "logits/chosen": -2.4300150871276855, "logits/rejected": -2.282163143157959, "logps/chosen": -206.68984985351562, "logps/rejected": -337.49383544921875, "loss": 0.5565, "rewards/accuracies": 0.75, "rewards/chosen": 0.01374092698097229, "rewards/margins": 1.552530288696289, "rewards/rejected": -1.5387895107269287, "step": 4378 }, { "epoch": 0.5, "learning_rate": 1.5082523703616996e-07, "logits/chosen": -3.096707820892334, "logits/rejected": -3.152357816696167, "logps/chosen": -221.82876586914062, "logps/rejected": -132.79945373535156, "loss": 0.4855, "rewards/accuracies": 0.625, "rewards/chosen": 0.05497577786445618, "rewards/margins": 1.5173349380493164, "rewards/rejected": -1.4623591899871826, "step": 4379 }, { "epoch": 0.5, "learning_rate": 1.507901205665457e-07, "logits/chosen": -3.2750179767608643, "logits/rejected": -3.3553595542907715, "logps/chosen": -245.12274169921875, "logps/rejected": -223.70758056640625, "loss": 0.5897, "rewards/accuracies": 0.625, "rewards/chosen": -0.1376083791255951, "rewards/margins": 0.8249518871307373, "rewards/rejected": -0.9625602960586548, "step": 4380 }, { "epoch": 0.51, "learning_rate": 1.5075500409692144e-07, "logits/chosen": -2.7313644886016846, "logits/rejected": -2.7743587493896484, "logps/chosen": -540.2749633789062, "logps/rejected": -460.87469482421875, "loss": 0.3841, "rewards/accuracies": 0.875, "rewards/chosen": -0.19075126945972443, "rewards/margins": 2.1861088275909424, "rewards/rejected": -2.3768601417541504, "step": 4381 }, { "epoch": 0.51, "learning_rate": 1.507198876272972e-07, "logits/chosen": -2.812267303466797, "logits/rejected": -2.6020193099975586, "logps/chosen": -294.84356689453125, "logps/rejected": -347.6492919921875, "loss": 0.5273, "rewards/accuracies": 0.625, "rewards/chosen": -0.005048975348472595, "rewards/margins": 1.069933295249939, "rewards/rejected": -1.0749822854995728, "step": 4382 }, { "epoch": 0.51, "learning_rate": 1.5068477115767293e-07, "logits/chosen": -2.158843755722046, "logits/rejected": -1.9106693267822266, "logps/chosen": -303.96807861328125, "logps/rejected": -395.33856201171875, "loss": 0.7692, "rewards/accuracies": 0.5, "rewards/chosen": 0.22693565487861633, "rewards/margins": 1.0839990377426147, "rewards/rejected": -0.8570634126663208, "step": 4383 }, { "epoch": 0.51, "learning_rate": 1.5064965468804868e-07, "logits/chosen": -3.5930843353271484, "logits/rejected": -3.3900411128997803, "logps/chosen": -287.32232666015625, "logps/rejected": -253.5941925048828, "loss": 0.1538, "rewards/accuracies": 1.0, "rewards/chosen": 0.6565792560577393, "rewards/margins": 3.5085532665252686, "rewards/rejected": -2.8519740104675293, "step": 4384 }, { "epoch": 0.51, "learning_rate": 1.5061453821842446e-07, "logits/chosen": -3.605456829071045, "logits/rejected": -3.0852580070495605, "logps/chosen": -230.35739135742188, "logps/rejected": -126.30648803710938, "loss": 0.6576, "rewards/accuracies": 0.625, "rewards/chosen": -0.41916847229003906, "rewards/margins": 0.625045120716095, "rewards/rejected": -1.0442136526107788, "step": 4385 }, { "epoch": 0.51, "learning_rate": 1.5057942174880016e-07, "logits/chosen": -3.6225457191467285, "logits/rejected": -3.457719087600708, "logps/chosen": -388.4093017578125, "logps/rejected": -215.0829620361328, "loss": 0.4125, "rewards/accuracies": 0.75, "rewards/chosen": 0.18484365940093994, "rewards/margins": 1.8490149974822998, "rewards/rejected": -1.6641713380813599, "step": 4386 }, { "epoch": 0.51, "learning_rate": 1.5054430527917594e-07, "logits/chosen": -2.8146331310272217, "logits/rejected": -3.00343656539917, "logps/chosen": -351.9966735839844, "logps/rejected": -319.5005187988281, "loss": 0.4834, "rewards/accuracies": 0.75, "rewards/chosen": -0.8603094220161438, "rewards/margins": 0.8501147031784058, "rewards/rejected": -1.7104240655899048, "step": 4387 }, { "epoch": 0.51, "learning_rate": 1.5050918880955167e-07, "logits/chosen": -3.8175971508026123, "logits/rejected": -3.799285411834717, "logps/chosen": -248.61203002929688, "logps/rejected": -258.4638366699219, "loss": 0.4995, "rewards/accuracies": 0.75, "rewards/chosen": -0.4062659442424774, "rewards/margins": 0.9454419016838074, "rewards/rejected": -1.3517078161239624, "step": 4388 }, { "epoch": 0.51, "learning_rate": 1.5047407233992742e-07, "logits/chosen": -3.2852892875671387, "logits/rejected": -3.452378034591675, "logps/chosen": -140.73739624023438, "logps/rejected": -175.56150817871094, "loss": 0.4708, "rewards/accuracies": 0.625, "rewards/chosen": -0.22385556995868683, "rewards/margins": 2.776371955871582, "rewards/rejected": -3.000227689743042, "step": 4389 }, { "epoch": 0.51, "learning_rate": 1.5043895587030318e-07, "logits/chosen": -3.8097360134124756, "logits/rejected": -3.8893585205078125, "logps/chosen": -186.71109008789062, "logps/rejected": -225.67286682128906, "loss": 0.418, "rewards/accuracies": 0.625, "rewards/chosen": -0.3583275079727173, "rewards/margins": 1.45175039768219, "rewards/rejected": -1.8100779056549072, "step": 4390 }, { "epoch": 0.51, "learning_rate": 1.504038394006789e-07, "logits/chosen": -2.8321709632873535, "logits/rejected": -2.5557775497436523, "logps/chosen": -262.4767150878906, "logps/rejected": -282.2918395996094, "loss": 0.852, "rewards/accuracies": 0.625, "rewards/chosen": -0.13637547194957733, "rewards/margins": 0.42150604724884033, "rewards/rejected": -0.5578815340995789, "step": 4391 }, { "epoch": 0.51, "learning_rate": 1.5036872293105466e-07, "logits/chosen": -3.213109254837036, "logits/rejected": -3.2657155990600586, "logps/chosen": -248.5978546142578, "logps/rejected": -132.88436889648438, "loss": 0.3951, "rewards/accuracies": 0.875, "rewards/chosen": 0.183022603392601, "rewards/margins": 1.1139625310897827, "rewards/rejected": -0.9309399724006653, "step": 4392 }, { "epoch": 0.51, "learning_rate": 1.503336064614304e-07, "logits/chosen": -3.4584922790527344, "logits/rejected": -3.479621171951294, "logps/chosen": -322.09735107421875, "logps/rejected": -275.2425537109375, "loss": 0.3798, "rewards/accuracies": 0.625, "rewards/chosen": -0.16465619206428528, "rewards/margins": 1.8723613023757935, "rewards/rejected": -2.037017583847046, "step": 4393 }, { "epoch": 0.51, "learning_rate": 1.5029848999180614e-07, "logits/chosen": -3.163559675216675, "logits/rejected": -2.973947048187256, "logps/chosen": -361.8778991699219, "logps/rejected": -230.2376708984375, "loss": 0.5491, "rewards/accuracies": 0.625, "rewards/chosen": -0.486987441778183, "rewards/margins": 1.4815021753311157, "rewards/rejected": -1.968489646911621, "step": 4394 }, { "epoch": 0.51, "learning_rate": 1.502633735221819e-07, "logits/chosen": -3.8698008060455322, "logits/rejected": -3.915782928466797, "logps/chosen": -191.20315551757812, "logps/rejected": -270.44012451171875, "loss": 0.4353, "rewards/accuracies": 0.625, "rewards/chosen": 0.322562575340271, "rewards/margins": 1.7500865459442139, "rewards/rejected": -1.4275238513946533, "step": 4395 }, { "epoch": 0.51, "learning_rate": 1.5022825705255762e-07, "logits/chosen": -2.4990415573120117, "logits/rejected": -2.477761745452881, "logps/chosen": -316.3808288574219, "logps/rejected": -283.4256591796875, "loss": 0.5598, "rewards/accuracies": 0.75, "rewards/chosen": -0.8832269906997681, "rewards/margins": 1.0751512050628662, "rewards/rejected": -1.9583783149719238, "step": 4396 }, { "epoch": 0.51, "learning_rate": 1.5019314058293337e-07, "logits/chosen": -2.932175636291504, "logits/rejected": -2.7984063625335693, "logps/chosen": -375.29150390625, "logps/rejected": -345.2872314453125, "loss": 0.5167, "rewards/accuracies": 0.75, "rewards/chosen": -0.12368771433830261, "rewards/margins": 0.7360104322433472, "rewards/rejected": -0.8596981763839722, "step": 4397 }, { "epoch": 0.51, "learning_rate": 1.5015802411330915e-07, "logits/chosen": -2.7629036903381348, "logits/rejected": -2.5199787616729736, "logps/chosen": -215.64041137695312, "logps/rejected": -272.7957763671875, "loss": 0.0725, "rewards/accuracies": 1.0, "rewards/chosen": 0.8380064368247986, "rewards/margins": 3.336116313934326, "rewards/rejected": -2.498109817504883, "step": 4398 }, { "epoch": 0.51, "learning_rate": 1.5012290764368488e-07, "logits/chosen": -2.8755970001220703, "logits/rejected": -2.979569911956787, "logps/chosen": -180.9604949951172, "logps/rejected": -205.2681884765625, "loss": 0.3172, "rewards/accuracies": 0.75, "rewards/chosen": -0.17452417314052582, "rewards/margins": 2.5646045207977295, "rewards/rejected": -2.739128589630127, "step": 4399 }, { "epoch": 0.51, "learning_rate": 1.5008779117406064e-07, "logits/chosen": -3.3309595584869385, "logits/rejected": -3.5195393562316895, "logps/chosen": -155.01641845703125, "logps/rejected": -241.80455017089844, "loss": 0.3502, "rewards/accuracies": 0.75, "rewards/chosen": 0.2577670216560364, "rewards/margins": 2.0736489295959473, "rewards/rejected": -1.8158820867538452, "step": 4400 }, { "epoch": 0.51, "learning_rate": 1.500526747044364e-07, "logits/chosen": -3.2131705284118652, "logits/rejected": -3.1328415870666504, "logps/chosen": -315.64520263671875, "logps/rejected": -254.4636993408203, "loss": 0.2353, "rewards/accuracies": 1.0, "rewards/chosen": 0.0831565409898758, "rewards/margins": 3.3797080516815186, "rewards/rejected": -3.296551465988159, "step": 4401 }, { "epoch": 0.51, "learning_rate": 1.5001755823481212e-07, "logits/chosen": -3.269278049468994, "logits/rejected": -2.965360403060913, "logps/chosen": -362.98297119140625, "logps/rejected": -238.10060119628906, "loss": 0.4696, "rewards/accuracies": 0.875, "rewards/chosen": 0.24326296150684357, "rewards/margins": 2.0962040424346924, "rewards/rejected": -1.8529409170150757, "step": 4402 }, { "epoch": 0.51, "learning_rate": 1.4998244176518787e-07, "logits/chosen": -3.8166439533233643, "logits/rejected": -3.814970016479492, "logps/chosen": -232.79920959472656, "logps/rejected": -221.6922607421875, "loss": 0.4514, "rewards/accuracies": 0.875, "rewards/chosen": -0.46532317996025085, "rewards/margins": 2.42971134185791, "rewards/rejected": -2.8950347900390625, "step": 4403 }, { "epoch": 0.51, "learning_rate": 1.4994732529556362e-07, "logits/chosen": -3.784109115600586, "logits/rejected": -3.1705081462860107, "logps/chosen": -424.74676513671875, "logps/rejected": -292.21197509765625, "loss": 0.7107, "rewards/accuracies": 0.75, "rewards/chosen": -0.10034163296222687, "rewards/margins": 1.4442434310913086, "rewards/rejected": -1.5445849895477295, "step": 4404 }, { "epoch": 0.51, "learning_rate": 1.4991220882593935e-07, "logits/chosen": -2.2929201126098633, "logits/rejected": -2.325254440307617, "logps/chosen": -525.851806640625, "logps/rejected": -365.5827941894531, "loss": 0.3614, "rewards/accuracies": 0.75, "rewards/chosen": 0.17604303359985352, "rewards/margins": 2.818106174468994, "rewards/rejected": -2.6420631408691406, "step": 4405 }, { "epoch": 0.51, "learning_rate": 1.498770923563151e-07, "logits/chosen": -3.649132251739502, "logits/rejected": -3.6692564487457275, "logps/chosen": -472.29913330078125, "logps/rejected": -265.0448913574219, "loss": 0.4561, "rewards/accuracies": 0.625, "rewards/chosen": -0.8902941942214966, "rewards/margins": 1.2425613403320312, "rewards/rejected": -2.1328556537628174, "step": 4406 }, { "epoch": 0.51, "learning_rate": 1.4984197588669086e-07, "logits/chosen": -4.007996082305908, "logits/rejected": -4.043598175048828, "logps/chosen": -218.08935546875, "logps/rejected": -215.3282470703125, "loss": 0.4032, "rewards/accuracies": 0.75, "rewards/chosen": -0.3583040237426758, "rewards/margins": 1.7505686283111572, "rewards/rejected": -2.108872413635254, "step": 4407 }, { "epoch": 0.51, "learning_rate": 1.498068594170666e-07, "logits/chosen": -3.574333906173706, "logits/rejected": -3.482656240463257, "logps/chosen": -289.4785461425781, "logps/rejected": -249.06854248046875, "loss": 0.4187, "rewards/accuracies": 0.625, "rewards/chosen": -0.30419886112213135, "rewards/margins": 2.53704833984375, "rewards/rejected": -2.841247320175171, "step": 4408 }, { "epoch": 0.51, "learning_rate": 1.4977174294744234e-07, "logits/chosen": -2.5965232849121094, "logits/rejected": -2.196737766265869, "logps/chosen": -340.62823486328125, "logps/rejected": -303.58209228515625, "loss": 0.3998, "rewards/accuracies": 0.75, "rewards/chosen": 0.35948652029037476, "rewards/margins": 0.894048273563385, "rewards/rejected": -0.5345617532730103, "step": 4409 }, { "epoch": 0.51, "learning_rate": 1.497366264778181e-07, "logits/chosen": -3.259554386138916, "logits/rejected": -3.1776890754699707, "logps/chosen": -169.858154296875, "logps/rejected": -280.4656677246094, "loss": 0.5325, "rewards/accuracies": 0.625, "rewards/chosen": -0.08583186566829681, "rewards/margins": 1.023802399635315, "rewards/rejected": -1.109634280204773, "step": 4410 }, { "epoch": 0.51, "learning_rate": 1.4970151000819385e-07, "logits/chosen": -3.028939723968506, "logits/rejected": -3.0365071296691895, "logps/chosen": -354.32659912109375, "logps/rejected": -307.4539794921875, "loss": 0.3942, "rewards/accuracies": 0.75, "rewards/chosen": 0.018413707613945007, "rewards/margins": 2.7685017585754395, "rewards/rejected": -2.7500882148742676, "step": 4411 }, { "epoch": 0.51, "learning_rate": 1.4966639353856958e-07, "logits/chosen": -3.134254217147827, "logits/rejected": -2.996080160140991, "logps/chosen": -313.74981689453125, "logps/rejected": -473.1878662109375, "loss": 0.625, "rewards/accuracies": 0.625, "rewards/chosen": -0.43938106298446655, "rewards/margins": 1.235554814338684, "rewards/rejected": -1.6749359369277954, "step": 4412 }, { "epoch": 0.51, "learning_rate": 1.4963127706894533e-07, "logits/chosen": -2.737462282180786, "logits/rejected": -2.7807319164276123, "logps/chosen": -222.94297790527344, "logps/rejected": -209.45831298828125, "loss": 0.4198, "rewards/accuracies": 0.875, "rewards/chosen": 0.2705109715461731, "rewards/margins": 0.9310286641120911, "rewards/rejected": -0.660517692565918, "step": 4413 }, { "epoch": 0.51, "learning_rate": 1.4959616059932106e-07, "logits/chosen": -3.1707406044006348, "logits/rejected": -3.258849620819092, "logps/chosen": -340.6451110839844, "logps/rejected": -407.4120788574219, "loss": 0.6579, "rewards/accuracies": 0.625, "rewards/chosen": -0.045621439814567566, "rewards/margins": 0.5628175139427185, "rewards/rejected": -0.6084389686584473, "step": 4414 }, { "epoch": 0.51, "learning_rate": 1.4956104412969684e-07, "logits/chosen": -2.5895838737487793, "logits/rejected": -2.456406593322754, "logps/chosen": -334.995361328125, "logps/rejected": -233.80030822753906, "loss": 0.3257, "rewards/accuracies": 0.875, "rewards/chosen": 0.03696444630622864, "rewards/margins": 1.3640143871307373, "rewards/rejected": -1.3270500898361206, "step": 4415 }, { "epoch": 0.51, "learning_rate": 1.4952592766007256e-07, "logits/chosen": -3.2398808002471924, "logits/rejected": -2.8593647480010986, "logps/chosen": -207.0592041015625, "logps/rejected": -282.97509765625, "loss": 0.1948, "rewards/accuracies": 1.0, "rewards/chosen": -0.039132606238126755, "rewards/margins": 3.950399875640869, "rewards/rejected": -3.989531993865967, "step": 4416 }, { "epoch": 0.51, "learning_rate": 1.4949081119044832e-07, "logits/chosen": -3.310863971710205, "logits/rejected": -2.8487966060638428, "logps/chosen": -237.89346313476562, "logps/rejected": -289.961669921875, "loss": 0.3426, "rewards/accuracies": 0.75, "rewards/chosen": -0.44219154119491577, "rewards/margins": 2.1357641220092773, "rewards/rejected": -2.577955722808838, "step": 4417 }, { "epoch": 0.51, "learning_rate": 1.4945569472082405e-07, "logits/chosen": -2.9670209884643555, "logits/rejected": -2.81146502494812, "logps/chosen": -334.0722961425781, "logps/rejected": -236.03326416015625, "loss": 0.4056, "rewards/accuracies": 0.75, "rewards/chosen": 0.12095295637845993, "rewards/margins": 1.6039036512374878, "rewards/rejected": -1.4829509258270264, "step": 4418 }, { "epoch": 0.51, "learning_rate": 1.4942057825119983e-07, "logits/chosen": -2.8091940879821777, "logits/rejected": -3.009946346282959, "logps/chosen": -359.671142578125, "logps/rejected": -229.96270751953125, "loss": 0.2424, "rewards/accuracies": 0.875, "rewards/chosen": -0.13167595863342285, "rewards/margins": 2.332341194152832, "rewards/rejected": -2.464016914367676, "step": 4419 }, { "epoch": 0.51, "learning_rate": 1.4938546178157555e-07, "logits/chosen": -3.322906494140625, "logits/rejected": -3.467156171798706, "logps/chosen": -132.37025451660156, "logps/rejected": -163.06884765625, "loss": 0.3691, "rewards/accuracies": 0.75, "rewards/chosen": 0.1954316794872284, "rewards/margins": 1.3765482902526855, "rewards/rejected": -1.1811165809631348, "step": 4420 }, { "epoch": 0.51, "learning_rate": 1.493503453119513e-07, "logits/chosen": -2.797746181488037, "logits/rejected": -2.850905418395996, "logps/chosen": -339.642578125, "logps/rejected": -344.678955078125, "loss": 0.3704, "rewards/accuracies": 0.75, "rewards/chosen": 0.2056894302368164, "rewards/margins": 1.4266995191574097, "rewards/rejected": -1.2210102081298828, "step": 4421 }, { "epoch": 0.51, "learning_rate": 1.4931522884232703e-07, "logits/chosen": -2.9927191734313965, "logits/rejected": -2.8240818977355957, "logps/chosen": -232.27395629882812, "logps/rejected": -258.9929504394531, "loss": 0.5626, "rewards/accuracies": 0.875, "rewards/chosen": 0.5572441816329956, "rewards/margins": 0.8226922154426575, "rewards/rejected": -0.26544803380966187, "step": 4422 }, { "epoch": 0.51, "learning_rate": 1.492801123727028e-07, "logits/chosen": -2.2440619468688965, "logits/rejected": -2.090486764907837, "logps/chosen": -279.66473388671875, "logps/rejected": -223.8531494140625, "loss": 0.6709, "rewards/accuracies": 0.75, "rewards/chosen": -0.029772847890853882, "rewards/margins": 0.7437382936477661, "rewards/rejected": -0.7735111117362976, "step": 4423 }, { "epoch": 0.51, "learning_rate": 1.4924499590307854e-07, "logits/chosen": -3.514378786087036, "logits/rejected": -3.6415669918060303, "logps/chosen": -191.73605346679688, "logps/rejected": -198.2625274658203, "loss": 0.2252, "rewards/accuracies": 1.0, "rewards/chosen": -0.4174414575099945, "rewards/margins": 2.6252870559692383, "rewards/rejected": -3.0427284240722656, "step": 4424 }, { "epoch": 0.51, "learning_rate": 1.492098794334543e-07, "logits/chosen": -2.643167018890381, "logits/rejected": -2.650768995285034, "logps/chosen": -149.72711181640625, "logps/rejected": -278.10491943359375, "loss": 0.4056, "rewards/accuracies": 0.75, "rewards/chosen": 0.1489008516073227, "rewards/margins": 2.0988264083862305, "rewards/rejected": -1.949925422668457, "step": 4425 }, { "epoch": 0.51, "learning_rate": 1.4917476296383002e-07, "logits/chosen": -3.0202174186706543, "logits/rejected": -3.0639896392822266, "logps/chosen": -258.9016418457031, "logps/rejected": -243.50985717773438, "loss": 0.3458, "rewards/accuracies": 0.875, "rewards/chosen": -0.22540387511253357, "rewards/margins": 1.4120639562606812, "rewards/rejected": -1.637467861175537, "step": 4426 }, { "epoch": 0.51, "learning_rate": 1.4913964649420578e-07, "logits/chosen": -3.5829193592071533, "logits/rejected": -3.4704067707061768, "logps/chosen": -144.79978942871094, "logps/rejected": -148.05810546875, "loss": 0.9794, "rewards/accuracies": 0.875, "rewards/chosen": -0.8509852290153503, "rewards/margins": 0.4472019672393799, "rewards/rejected": -1.298187017440796, "step": 4427 }, { "epoch": 0.51, "learning_rate": 1.4910453002458153e-07, "logits/chosen": -3.1583967208862305, "logits/rejected": -3.1966285705566406, "logps/chosen": -203.6033477783203, "logps/rejected": -310.8119812011719, "loss": 0.1494, "rewards/accuracies": 0.875, "rewards/chosen": 0.39956507086753845, "rewards/margins": 3.8972878456115723, "rewards/rejected": -3.497722625732422, "step": 4428 }, { "epoch": 0.51, "learning_rate": 1.4906941355495726e-07, "logits/chosen": -3.2788944244384766, "logits/rejected": -3.121509552001953, "logps/chosen": -205.30548095703125, "logps/rejected": -277.1590881347656, "loss": 0.559, "rewards/accuracies": 0.625, "rewards/chosen": -0.1136661097407341, "rewards/margins": 1.0267040729522705, "rewards/rejected": -1.14037024974823, "step": 4429 }, { "epoch": 0.51, "learning_rate": 1.49034297085333e-07, "logits/chosen": -2.9656434059143066, "logits/rejected": -2.999607801437378, "logps/chosen": -332.5215148925781, "logps/rejected": -239.9374542236328, "loss": 0.3896, "rewards/accuracies": 0.75, "rewards/chosen": 0.4691407084465027, "rewards/margins": 1.0405354499816895, "rewards/rejected": -0.571394681930542, "step": 4430 }, { "epoch": 0.51, "learning_rate": 1.4899918061570874e-07, "logits/chosen": -3.8042986392974854, "logits/rejected": -3.655273675918579, "logps/chosen": -148.4359130859375, "logps/rejected": -181.92172241210938, "loss": 0.4728, "rewards/accuracies": 0.75, "rewards/chosen": 0.149905726313591, "rewards/margins": 0.7844531536102295, "rewards/rejected": -0.6345474720001221, "step": 4431 }, { "epoch": 0.51, "learning_rate": 1.4896406414608452e-07, "logits/chosen": -3.5535364151000977, "logits/rejected": -3.6351075172424316, "logps/chosen": -227.19775390625, "logps/rejected": -273.96795654296875, "loss": 0.2142, "rewards/accuracies": 0.875, "rewards/chosen": 1.1983420848846436, "rewards/margins": 2.7118406295776367, "rewards/rejected": -1.5134987831115723, "step": 4432 }, { "epoch": 0.51, "learning_rate": 1.4892894767646025e-07, "logits/chosen": -3.7133255004882812, "logits/rejected": -4.147195816040039, "logps/chosen": -135.9350128173828, "logps/rejected": -306.511474609375, "loss": 0.4503, "rewards/accuracies": 0.625, "rewards/chosen": -0.6263597011566162, "rewards/margins": 2.3074469566345215, "rewards/rejected": -2.9338066577911377, "step": 4433 }, { "epoch": 0.51, "learning_rate": 1.48893831206836e-07, "logits/chosen": -2.8249757289886475, "logits/rejected": -2.8673744201660156, "logps/chosen": -263.5940246582031, "logps/rejected": -208.982666015625, "loss": 0.5149, "rewards/accuracies": 0.625, "rewards/chosen": 0.1985962837934494, "rewards/margins": 1.2444778680801392, "rewards/rejected": -1.0458815097808838, "step": 4434 }, { "epoch": 0.51, "learning_rate": 1.4885871473721173e-07, "logits/chosen": -2.9143614768981934, "logits/rejected": -2.78123140335083, "logps/chosen": -186.65493774414062, "logps/rejected": -163.7853240966797, "loss": 0.2555, "rewards/accuracies": 0.875, "rewards/chosen": 0.09070964902639389, "rewards/margins": 1.5750693082809448, "rewards/rejected": -1.4843597412109375, "step": 4435 }, { "epoch": 0.51, "learning_rate": 1.488235982675875e-07, "logits/chosen": -3.3565430641174316, "logits/rejected": -3.221827745437622, "logps/chosen": -219.25714111328125, "logps/rejected": -365.2272033691406, "loss": 0.5166, "rewards/accuracies": 0.875, "rewards/chosen": -0.5754996538162231, "rewards/margins": 4.24521017074585, "rewards/rejected": -4.820710182189941, "step": 4436 }, { "epoch": 0.51, "learning_rate": 1.4878848179796324e-07, "logits/chosen": -2.369292974472046, "logits/rejected": -2.383864164352417, "logps/chosen": -231.35989379882812, "logps/rejected": -190.14627075195312, "loss": 0.1785, "rewards/accuracies": 1.0, "rewards/chosen": 0.4834654927253723, "rewards/margins": 2.323647975921631, "rewards/rejected": -1.8401823043823242, "step": 4437 }, { "epoch": 0.51, "learning_rate": 1.48753365328339e-07, "logits/chosen": -2.5285165309906006, "logits/rejected": -2.7143735885620117, "logps/chosen": -338.71844482421875, "logps/rejected": -288.13250732421875, "loss": 0.5167, "rewards/accuracies": 0.875, "rewards/chosen": 0.16128399968147278, "rewards/margins": 1.4340345859527588, "rewards/rejected": -1.2727504968643188, "step": 4438 }, { "epoch": 0.51, "learning_rate": 1.4871824885871472e-07, "logits/chosen": -3.5200295448303223, "logits/rejected": -3.30025053024292, "logps/chosen": -187.4435577392578, "logps/rejected": -214.3146514892578, "loss": 0.1836, "rewards/accuracies": 1.0, "rewards/chosen": 0.06196771562099457, "rewards/margins": 2.5855486392974854, "rewards/rejected": -2.52358078956604, "step": 4439 }, { "epoch": 0.51, "learning_rate": 1.4868313238909047e-07, "logits/chosen": -2.809601306915283, "logits/rejected": -2.520714044570923, "logps/chosen": -244.7889404296875, "logps/rejected": -239.95440673828125, "loss": 0.3179, "rewards/accuracies": 0.875, "rewards/chosen": -0.2035580575466156, "rewards/margins": 1.294797658920288, "rewards/rejected": -1.498355746269226, "step": 4440 }, { "epoch": 0.51, "learning_rate": 1.4864801591946623e-07, "logits/chosen": -3.6114373207092285, "logits/rejected": -3.5216073989868164, "logps/chosen": -219.03671264648438, "logps/rejected": -265.8146667480469, "loss": 0.5364, "rewards/accuracies": 0.625, "rewards/chosen": -0.6353132724761963, "rewards/margins": 0.6936151385307312, "rewards/rejected": -1.3289283514022827, "step": 4441 }, { "epoch": 0.51, "learning_rate": 1.4861289944984198e-07, "logits/chosen": -3.6401960849761963, "logits/rejected": -3.621309995651245, "logps/chosen": -278.2627868652344, "logps/rejected": -198.7195281982422, "loss": 0.5203, "rewards/accuracies": 0.875, "rewards/chosen": -0.21571093797683716, "rewards/margins": 1.6665693521499634, "rewards/rejected": -1.8822803497314453, "step": 4442 }, { "epoch": 0.51, "learning_rate": 1.485777829802177e-07, "logits/chosen": -2.9097719192504883, "logits/rejected": -2.8784117698669434, "logps/chosen": -183.85911560058594, "logps/rejected": -159.04727172851562, "loss": 0.6063, "rewards/accuracies": 0.625, "rewards/chosen": -0.05276265740394592, "rewards/margins": 0.836016058921814, "rewards/rejected": -0.8887786865234375, "step": 4443 }, { "epoch": 0.51, "learning_rate": 1.4854266651059346e-07, "logits/chosen": -2.7692065238952637, "logits/rejected": -2.798689842224121, "logps/chosen": -208.16793823242188, "logps/rejected": -201.27383422851562, "loss": 0.4149, "rewards/accuracies": 0.75, "rewards/chosen": 0.03829929232597351, "rewards/margins": 0.8330237865447998, "rewards/rejected": -0.7947244644165039, "step": 4444 }, { "epoch": 0.51, "learning_rate": 1.4850755004096921e-07, "logits/chosen": -2.9386284351348877, "logits/rejected": -3.0295722484588623, "logps/chosen": -322.6251525878906, "logps/rejected": -317.8377380371094, "loss": 0.2728, "rewards/accuracies": 0.875, "rewards/chosen": -0.09657379984855652, "rewards/margins": 1.689000129699707, "rewards/rejected": -1.785573959350586, "step": 4445 }, { "epoch": 0.51, "learning_rate": 1.4847243357134494e-07, "logits/chosen": -2.327707290649414, "logits/rejected": -2.397747039794922, "logps/chosen": -198.6548614501953, "logps/rejected": -342.28570556640625, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": 0.4047154188156128, "rewards/margins": 4.239616394042969, "rewards/rejected": -3.8349013328552246, "step": 4446 }, { "epoch": 0.51, "learning_rate": 1.484373171017207e-07, "logits/chosen": -3.198585033416748, "logits/rejected": -2.9963932037353516, "logps/chosen": -290.19500732421875, "logps/rejected": -237.94703674316406, "loss": 0.2749, "rewards/accuracies": 0.875, "rewards/chosen": -0.17651121318340302, "rewards/margins": 2.323784589767456, "rewards/rejected": -2.500295639038086, "step": 4447 }, { "epoch": 0.51, "learning_rate": 1.4840220063209645e-07, "logits/chosen": -2.4866113662719727, "logits/rejected": -2.4639556407928467, "logps/chosen": -422.8250732421875, "logps/rejected": -441.9895324707031, "loss": 0.24, "rewards/accuracies": 1.0, "rewards/chosen": 0.4765273928642273, "rewards/margins": 2.7117953300476074, "rewards/rejected": -2.2352683544158936, "step": 4448 }, { "epoch": 0.51, "learning_rate": 1.483670841624722e-07, "logits/chosen": -2.889845609664917, "logits/rejected": -3.0506131649017334, "logps/chosen": -238.18173217773438, "logps/rejected": -256.6129150390625, "loss": 0.4197, "rewards/accuracies": 0.75, "rewards/chosen": 0.0840979516506195, "rewards/margins": 1.2450153827667236, "rewards/rejected": -1.1609172821044922, "step": 4449 }, { "epoch": 0.51, "learning_rate": 1.4833196769284793e-07, "logits/chosen": -2.688924789428711, "logits/rejected": -2.687879800796509, "logps/chosen": -293.0924072265625, "logps/rejected": -254.48776245117188, "loss": 0.5309, "rewards/accuracies": 0.625, "rewards/chosen": -0.37013405561447144, "rewards/margins": 1.4146028757095337, "rewards/rejected": -1.7847368717193604, "step": 4450 }, { "epoch": 0.51, "learning_rate": 1.4829685122322368e-07, "logits/chosen": -2.7830095291137695, "logits/rejected": -2.76973295211792, "logps/chosen": -492.3653564453125, "logps/rejected": -217.7748260498047, "loss": 0.3886, "rewards/accuracies": 0.875, "rewards/chosen": -0.9104022979736328, "rewards/margins": 0.9988981485366821, "rewards/rejected": -1.909300446510315, "step": 4451 }, { "epoch": 0.51, "learning_rate": 1.4826173475359944e-07, "logits/chosen": -2.7749366760253906, "logits/rejected": -2.8776772022247314, "logps/chosen": -254.55068969726562, "logps/rejected": -160.39788818359375, "loss": 0.4646, "rewards/accuracies": 0.625, "rewards/chosen": -0.548332154750824, "rewards/margins": 1.146446704864502, "rewards/rejected": -1.6947788000106812, "step": 4452 }, { "epoch": 0.51, "learning_rate": 1.482266182839752e-07, "logits/chosen": -2.840045690536499, "logits/rejected": -2.918057918548584, "logps/chosen": -194.0352020263672, "logps/rejected": -235.64617919921875, "loss": 0.2706, "rewards/accuracies": 0.875, "rewards/chosen": 0.32584789395332336, "rewards/margins": 2.1029727458953857, "rewards/rejected": -1.7771248817443848, "step": 4453 }, { "epoch": 0.51, "learning_rate": 1.4819150181435092e-07, "logits/chosen": -3.4630203247070312, "logits/rejected": -3.376509189605713, "logps/chosen": -425.3464050292969, "logps/rejected": -304.2779235839844, "loss": 0.39, "rewards/accuracies": 0.75, "rewards/chosen": -0.6722319722175598, "rewards/margins": 1.1780495643615723, "rewards/rejected": -1.8502817153930664, "step": 4454 }, { "epoch": 0.51, "learning_rate": 1.4815638534472667e-07, "logits/chosen": -2.607647657394409, "logits/rejected": -2.6655056476593018, "logps/chosen": -325.3609924316406, "logps/rejected": -286.13543701171875, "loss": 0.2558, "rewards/accuracies": 0.875, "rewards/chosen": 0.08824195712804794, "rewards/margins": 2.7558321952819824, "rewards/rejected": -2.6675901412963867, "step": 4455 }, { "epoch": 0.51, "learning_rate": 1.4812126887510243e-07, "logits/chosen": -3.135873794555664, "logits/rejected": -3.3056201934814453, "logps/chosen": -245.12570190429688, "logps/rejected": -264.8995361328125, "loss": 0.6259, "rewards/accuracies": 0.75, "rewards/chosen": -0.38996973633766174, "rewards/margins": 2.7337241172790527, "rewards/rejected": -3.1236939430236816, "step": 4456 }, { "epoch": 0.51, "learning_rate": 1.4808615240547815e-07, "logits/chosen": -3.240366220474243, "logits/rejected": -3.0487334728240967, "logps/chosen": -342.52850341796875, "logps/rejected": -277.62335205078125, "loss": 0.3061, "rewards/accuracies": 0.875, "rewards/chosen": 0.6486632823944092, "rewards/margins": 2.538386106491089, "rewards/rejected": -1.8897227048873901, "step": 4457 }, { "epoch": 0.51, "learning_rate": 1.480510359358539e-07, "logits/chosen": -3.3613197803497314, "logits/rejected": -3.5905370712280273, "logps/chosen": -475.66656494140625, "logps/rejected": -372.38623046875, "loss": 0.2048, "rewards/accuracies": 0.875, "rewards/chosen": 0.0389859601855278, "rewards/margins": 2.8403737545013428, "rewards/rejected": -2.8013877868652344, "step": 4458 }, { "epoch": 0.51, "learning_rate": 1.4801591946622966e-07, "logits/chosen": -3.6479711532592773, "logits/rejected": -3.561401128768921, "logps/chosen": -169.01539611816406, "logps/rejected": -173.09194946289062, "loss": 0.5249, "rewards/accuracies": 0.625, "rewards/chosen": 0.009226039052009583, "rewards/margins": 1.0817134380340576, "rewards/rejected": -1.0724873542785645, "step": 4459 }, { "epoch": 0.51, "learning_rate": 1.4798080299660542e-07, "logits/chosen": -2.7888941764831543, "logits/rejected": -2.960435152053833, "logps/chosen": -328.793212890625, "logps/rejected": -252.36912536621094, "loss": 0.1952, "rewards/accuracies": 0.875, "rewards/chosen": 0.2533559203147888, "rewards/margins": 2.528257131576538, "rewards/rejected": -2.2749011516571045, "step": 4460 }, { "epoch": 0.51, "learning_rate": 1.4794568652698114e-07, "logits/chosen": -2.904768466949463, "logits/rejected": -2.7378928661346436, "logps/chosen": -543.7408447265625, "logps/rejected": -370.409423828125, "loss": 0.2415, "rewards/accuracies": 0.875, "rewards/chosen": 0.3002813458442688, "rewards/margins": 2.6945126056671143, "rewards/rejected": -2.3942313194274902, "step": 4461 }, { "epoch": 0.51, "learning_rate": 1.479105700573569e-07, "logits/chosen": -2.733698606491089, "logits/rejected": -3.0256755352020264, "logps/chosen": -280.9068908691406, "logps/rejected": -342.5966796875, "loss": 0.3081, "rewards/accuracies": 0.875, "rewards/chosen": -0.24758386611938477, "rewards/margins": 2.0729103088378906, "rewards/rejected": -2.3204944133758545, "step": 4462 }, { "epoch": 0.51, "learning_rate": 1.4787545358773262e-07, "logits/chosen": -2.533634662628174, "logits/rejected": -2.7438161373138428, "logps/chosen": -432.3868713378906, "logps/rejected": -353.08721923828125, "loss": 0.1247, "rewards/accuracies": 1.0, "rewards/chosen": 0.3993610739707947, "rewards/margins": 2.7170588970184326, "rewards/rejected": -2.317697763442993, "step": 4463 }, { "epoch": 0.51, "learning_rate": 1.478403371181084e-07, "logits/chosen": -3.056331157684326, "logits/rejected": -2.744035005569458, "logps/chosen": -317.1224670410156, "logps/rejected": -314.74591064453125, "loss": 0.2622, "rewards/accuracies": 0.875, "rewards/chosen": 0.13479551672935486, "rewards/margins": 1.9855000972747803, "rewards/rejected": -1.8507044315338135, "step": 4464 }, { "epoch": 0.51, "learning_rate": 1.4780522064848413e-07, "logits/chosen": -3.764188528060913, "logits/rejected": -3.520798921585083, "logps/chosen": -320.7261962890625, "logps/rejected": -232.54324340820312, "loss": 0.5701, "rewards/accuracies": 0.75, "rewards/chosen": -0.7982425093650818, "rewards/margins": 1.206192970275879, "rewards/rejected": -2.0044355392456055, "step": 4465 }, { "epoch": 0.51, "learning_rate": 1.4777010417885989e-07, "logits/chosen": -2.884833335876465, "logits/rejected": -2.7404165267944336, "logps/chosen": -539.05810546875, "logps/rejected": -169.5865478515625, "loss": 0.3276, "rewards/accuracies": 1.0, "rewards/chosen": -0.5535966157913208, "rewards/margins": 1.5286110639572144, "rewards/rejected": -2.082207679748535, "step": 4466 }, { "epoch": 0.51, "learning_rate": 1.4773498770923561e-07, "logits/chosen": -3.553219795227051, "logits/rejected": -3.366637706756592, "logps/chosen": -201.02578735351562, "logps/rejected": -165.07138061523438, "loss": 0.3506, "rewards/accuracies": 0.875, "rewards/chosen": -0.42999449372291565, "rewards/margins": 1.5295319557189941, "rewards/rejected": -1.959526538848877, "step": 4467 }, { "epoch": 0.52, "learning_rate": 1.4769987123961137e-07, "logits/chosen": -2.861527442932129, "logits/rejected": -3.08376407623291, "logps/chosen": -224.36859130859375, "logps/rejected": -289.195068359375, "loss": 0.1926, "rewards/accuracies": 1.0, "rewards/chosen": 0.7883998155593872, "rewards/margins": 3.3427419662475586, "rewards/rejected": -2.554342269897461, "step": 4468 }, { "epoch": 0.52, "learning_rate": 1.4766475476998712e-07, "logits/chosen": -2.6032204627990723, "logits/rejected": -2.862846851348877, "logps/chosen": -350.8592529296875, "logps/rejected": -235.1274871826172, "loss": 0.2162, "rewards/accuracies": 1.0, "rewards/chosen": 0.14778056740760803, "rewards/margins": 2.2069203853607178, "rewards/rejected": -2.0591397285461426, "step": 4469 }, { "epoch": 0.52, "learning_rate": 1.4762963830036288e-07, "logits/chosen": -2.4603216648101807, "logits/rejected": -2.3557114601135254, "logps/chosen": -333.974609375, "logps/rejected": -322.43310546875, "loss": 0.3373, "rewards/accuracies": 0.875, "rewards/chosen": -0.2209373563528061, "rewards/margins": 1.2770605087280273, "rewards/rejected": -1.497997760772705, "step": 4470 }, { "epoch": 0.52, "learning_rate": 1.475945218307386e-07, "logits/chosen": -3.36956787109375, "logits/rejected": -3.2256016731262207, "logps/chosen": -203.27102661132812, "logps/rejected": -193.315673828125, "loss": 0.24, "rewards/accuracies": 0.875, "rewards/chosen": 0.49033570289611816, "rewards/margins": 1.9732637405395508, "rewards/rejected": -1.4829280376434326, "step": 4471 }, { "epoch": 0.52, "learning_rate": 1.4755940536111436e-07, "logits/chosen": -2.995180606842041, "logits/rejected": -2.9362497329711914, "logps/chosen": -256.50396728515625, "logps/rejected": -248.5731201171875, "loss": 0.6174, "rewards/accuracies": 0.625, "rewards/chosen": 0.3274630904197693, "rewards/margins": 1.0640506744384766, "rewards/rejected": -0.736587643623352, "step": 4472 }, { "epoch": 0.52, "learning_rate": 1.475242888914901e-07, "logits/chosen": -3.0070202350616455, "logits/rejected": -3.336422920227051, "logps/chosen": -244.78758239746094, "logps/rejected": -317.3347473144531, "loss": 0.2423, "rewards/accuracies": 0.875, "rewards/chosen": -0.14586450159549713, "rewards/margins": 4.2756876945495605, "rewards/rejected": -4.421552658081055, "step": 4473 }, { "epoch": 0.52, "learning_rate": 1.4748917242186584e-07, "logits/chosen": -3.2747576236724854, "logits/rejected": -2.99515962600708, "logps/chosen": -261.779052734375, "logps/rejected": -231.59133911132812, "loss": 0.2772, "rewards/accuracies": 1.0, "rewards/chosen": 0.14123916625976562, "rewards/margins": 1.8015648126602173, "rewards/rejected": -1.660325527191162, "step": 4474 }, { "epoch": 0.52, "learning_rate": 1.474540559522416e-07, "logits/chosen": -2.3643627166748047, "logits/rejected": -2.6696486473083496, "logps/chosen": -315.5561828613281, "logps/rejected": -239.1826629638672, "loss": 0.4177, "rewards/accuracies": 0.875, "rewards/chosen": 0.03897427022457123, "rewards/margins": 1.5130833387374878, "rewards/rejected": -1.4741090536117554, "step": 4475 }, { "epoch": 0.52, "learning_rate": 1.4741893948261735e-07, "logits/chosen": -2.7469329833984375, "logits/rejected": -2.8960094451904297, "logps/chosen": -370.9471435546875, "logps/rejected": -243.80166625976562, "loss": 0.4825, "rewards/accuracies": 0.75, "rewards/chosen": 0.17920604348182678, "rewards/margins": 0.9487786889076233, "rewards/rejected": -0.7695726752281189, "step": 4476 }, { "epoch": 0.52, "learning_rate": 1.473838230129931e-07, "logits/chosen": -3.068401336669922, "logits/rejected": -2.94319486618042, "logps/chosen": -274.8902587890625, "logps/rejected": -235.03656005859375, "loss": 0.5592, "rewards/accuracies": 0.625, "rewards/chosen": 0.06936344504356384, "rewards/margins": 0.7675840854644775, "rewards/rejected": -0.6982207298278809, "step": 4477 }, { "epoch": 0.52, "learning_rate": 1.4734870654336883e-07, "logits/chosen": -3.350950241088867, "logits/rejected": -3.365607500076294, "logps/chosen": -321.41217041015625, "logps/rejected": -183.76742553710938, "loss": 0.3296, "rewards/accuracies": 1.0, "rewards/chosen": 0.673633873462677, "rewards/margins": 2.1617050170898438, "rewards/rejected": -1.4880712032318115, "step": 4478 }, { "epoch": 0.52, "learning_rate": 1.4731359007374458e-07, "logits/chosen": -3.223801851272583, "logits/rejected": -3.50046443939209, "logps/chosen": -257.3007507324219, "logps/rejected": -277.92059326171875, "loss": 0.4401, "rewards/accuracies": 0.625, "rewards/chosen": -0.5318021178245544, "rewards/margins": 1.0931155681610107, "rewards/rejected": -1.6249176263809204, "step": 4479 }, { "epoch": 0.52, "learning_rate": 1.472784736041203e-07, "logits/chosen": -3.196894407272339, "logits/rejected": -2.894228935241699, "logps/chosen": -349.4918518066406, "logps/rejected": -169.04286193847656, "loss": 0.2934, "rewards/accuracies": 0.875, "rewards/chosen": 0.3423619866371155, "rewards/margins": 1.5952322483062744, "rewards/rejected": -1.2528700828552246, "step": 4480 }, { "epoch": 0.52, "learning_rate": 1.472433571344961e-07, "logits/chosen": -3.222590923309326, "logits/rejected": -3.051413059234619, "logps/chosen": -309.04803466796875, "logps/rejected": -256.827880859375, "loss": 0.2576, "rewards/accuracies": 1.0, "rewards/chosen": 0.254152774810791, "rewards/margins": 2.37849760055542, "rewards/rejected": -2.124344825744629, "step": 4481 }, { "epoch": 0.52, "learning_rate": 1.4720824066487182e-07, "logits/chosen": -3.0856611728668213, "logits/rejected": -3.1687586307525635, "logps/chosen": -255.06033325195312, "logps/rejected": -169.47666931152344, "loss": 0.4179, "rewards/accuracies": 0.75, "rewards/chosen": -0.26923736929893494, "rewards/margins": 1.7413792610168457, "rewards/rejected": -2.0106165409088135, "step": 4482 }, { "epoch": 0.52, "learning_rate": 1.4717312419524757e-07, "logits/chosen": -3.219921588897705, "logits/rejected": -3.323270320892334, "logps/chosen": -158.9097442626953, "logps/rejected": -189.7758331298828, "loss": 0.5271, "rewards/accuracies": 0.75, "rewards/chosen": -0.4286966025829315, "rewards/margins": 1.1500000953674316, "rewards/rejected": -1.578696608543396, "step": 4483 }, { "epoch": 0.52, "learning_rate": 1.471380077256233e-07, "logits/chosen": -3.181910276412964, "logits/rejected": -3.078341484069824, "logps/chosen": -166.9610137939453, "logps/rejected": -241.27218627929688, "loss": 0.3852, "rewards/accuracies": 0.75, "rewards/chosen": 0.12652593851089478, "rewards/margins": 1.823262095451355, "rewards/rejected": -1.6967360973358154, "step": 4484 }, { "epoch": 0.52, "learning_rate": 1.4710289125599905e-07, "logits/chosen": -3.8667984008789062, "logits/rejected": -3.7569265365600586, "logps/chosen": -224.504638671875, "logps/rejected": -233.3239288330078, "loss": 0.5051, "rewards/accuracies": 0.75, "rewards/chosen": -0.2830770015716553, "rewards/margins": 0.5985735058784485, "rewards/rejected": -0.8816505670547485, "step": 4485 }, { "epoch": 0.52, "learning_rate": 1.470677747863748e-07, "logits/chosen": -1.9860210418701172, "logits/rejected": -2.179534435272217, "logps/chosen": -453.6348876953125, "logps/rejected": -432.6441650390625, "loss": 0.867, "rewards/accuracies": 0.625, "rewards/chosen": -0.30538517236709595, "rewards/margins": 0.22654691338539124, "rewards/rejected": -0.5319320559501648, "step": 4486 }, { "epoch": 0.52, "learning_rate": 1.4703265831675056e-07, "logits/chosen": -3.313593864440918, "logits/rejected": -3.3290834426879883, "logps/chosen": -224.21533203125, "logps/rejected": -281.1157531738281, "loss": 0.976, "rewards/accuracies": 0.625, "rewards/chosen": -0.6812168955802917, "rewards/margins": 0.7177561521530151, "rewards/rejected": -1.398972988128662, "step": 4487 }, { "epoch": 0.52, "learning_rate": 1.4699754184712629e-07, "logits/chosen": -2.922367572784424, "logits/rejected": -2.682837963104248, "logps/chosen": -359.6688232421875, "logps/rejected": -466.0274658203125, "loss": 0.4352, "rewards/accuracies": 0.875, "rewards/chosen": -0.06858719885349274, "rewards/margins": 1.5292675495147705, "rewards/rejected": -1.5978548526763916, "step": 4488 }, { "epoch": 0.52, "learning_rate": 1.4696242537750204e-07, "logits/chosen": -2.973273754119873, "logits/rejected": -3.23939847946167, "logps/chosen": -268.39715576171875, "logps/rejected": -292.90350341796875, "loss": 0.1751, "rewards/accuracies": 1.0, "rewards/chosen": 0.8730762600898743, "rewards/margins": 2.6055731773376465, "rewards/rejected": -1.732496976852417, "step": 4489 }, { "epoch": 0.52, "learning_rate": 1.469273089078778e-07, "logits/chosen": -3.0905542373657227, "logits/rejected": -3.041985511779785, "logps/chosen": -500.0098876953125, "logps/rejected": -401.12091064453125, "loss": 0.2019, "rewards/accuracies": 0.875, "rewards/chosen": -0.3779023289680481, "rewards/margins": 2.4129090309143066, "rewards/rejected": -2.790811538696289, "step": 4490 }, { "epoch": 0.52, "learning_rate": 1.4689219243825352e-07, "logits/chosen": -2.878777027130127, "logits/rejected": -2.6340315341949463, "logps/chosen": -281.171142578125, "logps/rejected": -233.99771118164062, "loss": 0.5341, "rewards/accuracies": 0.625, "rewards/chosen": -0.4875459671020508, "rewards/margins": 1.1651856899261475, "rewards/rejected": -1.6527316570281982, "step": 4491 }, { "epoch": 0.52, "learning_rate": 1.4685707596862928e-07, "logits/chosen": -3.6824026107788086, "logits/rejected": -3.3325459957122803, "logps/chosen": -158.31658935546875, "logps/rejected": -146.34243774414062, "loss": 0.4291, "rewards/accuracies": 0.75, "rewards/chosen": 0.02467847615480423, "rewards/margins": 1.7604386806488037, "rewards/rejected": -1.73576021194458, "step": 4492 }, { "epoch": 0.52, "learning_rate": 1.4682195949900503e-07, "logits/chosen": -2.7841150760650635, "logits/rejected": -2.8871712684631348, "logps/chosen": -319.2080078125, "logps/rejected": -314.50579833984375, "loss": 0.3568, "rewards/accuracies": 0.75, "rewards/chosen": 0.242509663105011, "rewards/margins": 2.324477195739746, "rewards/rejected": -2.08196759223938, "step": 4493 }, { "epoch": 0.52, "learning_rate": 1.4678684302938078e-07, "logits/chosen": -2.989781141281128, "logits/rejected": -2.7853798866271973, "logps/chosen": -118.13092803955078, "logps/rejected": -172.00779724121094, "loss": 0.4128, "rewards/accuracies": 0.875, "rewards/chosen": -0.006840720772743225, "rewards/margins": 1.9979379177093506, "rewards/rejected": -2.0047786235809326, "step": 4494 }, { "epoch": 0.52, "learning_rate": 1.467517265597565e-07, "logits/chosen": -2.8594789505004883, "logits/rejected": -3.0644407272338867, "logps/chosen": -244.71946716308594, "logps/rejected": -257.74169921875, "loss": 0.2751, "rewards/accuracies": 0.875, "rewards/chosen": 0.018032744526863098, "rewards/margins": 2.1033644676208496, "rewards/rejected": -2.085331439971924, "step": 4495 }, { "epoch": 0.52, "learning_rate": 1.4671661009013226e-07, "logits/chosen": -3.654344320297241, "logits/rejected": -3.5573959350585938, "logps/chosen": -241.46047973632812, "logps/rejected": -173.82058715820312, "loss": 0.4471, "rewards/accuracies": 0.625, "rewards/chosen": -0.3898181915283203, "rewards/margins": 0.94444739818573, "rewards/rejected": -1.3342655897140503, "step": 4496 }, { "epoch": 0.52, "learning_rate": 1.4668149362050802e-07, "logits/chosen": -3.2653326988220215, "logits/rejected": -3.362544059753418, "logps/chosen": -220.3084716796875, "logps/rejected": -197.75144958496094, "loss": 0.6626, "rewards/accuracies": 0.625, "rewards/chosen": -0.7692713737487793, "rewards/margins": 1.0185774564743042, "rewards/rejected": -1.7878488302230835, "step": 4497 }, { "epoch": 0.52, "learning_rate": 1.4664637715088377e-07, "logits/chosen": -3.9611096382141113, "logits/rejected": -3.4598300457000732, "logps/chosen": -260.696533203125, "logps/rejected": -180.54263305664062, "loss": 0.2311, "rewards/accuracies": 0.875, "rewards/chosen": -0.22186364233493805, "rewards/margins": 2.259312152862549, "rewards/rejected": -2.4811758995056152, "step": 4498 }, { "epoch": 0.52, "learning_rate": 1.466112606812595e-07, "logits/chosen": -3.0132431983947754, "logits/rejected": -3.0410525798797607, "logps/chosen": -287.25958251953125, "logps/rejected": -430.99932861328125, "loss": 0.2157, "rewards/accuracies": 1.0, "rewards/chosen": 0.29073768854141235, "rewards/margins": 2.485402822494507, "rewards/rejected": -2.19466495513916, "step": 4499 }, { "epoch": 0.52, "learning_rate": 1.4657614421163525e-07, "logits/chosen": -3.0384936332702637, "logits/rejected": -3.4473626613616943, "logps/chosen": -258.6608581542969, "logps/rejected": -310.5686340332031, "loss": 0.38, "rewards/accuracies": 0.875, "rewards/chosen": -0.3637028932571411, "rewards/margins": 2.8127434253692627, "rewards/rejected": -3.1764464378356934, "step": 4500 }, { "epoch": 0.52, "learning_rate": 1.46541027742011e-07, "logits/chosen": -3.47510027885437, "logits/rejected": -3.4909675121307373, "logps/chosen": -291.4259033203125, "logps/rejected": -274.2096862792969, "loss": 0.6776, "rewards/accuracies": 0.75, "rewards/chosen": -0.40233972668647766, "rewards/margins": 0.4615073800086975, "rewards/rejected": -0.863847017288208, "step": 4501 }, { "epoch": 0.52, "learning_rate": 1.4650591127238673e-07, "logits/chosen": -3.1235296726226807, "logits/rejected": -2.704055070877075, "logps/chosen": -274.13031005859375, "logps/rejected": -280.25384521484375, "loss": 0.3552, "rewards/accuracies": 0.875, "rewards/chosen": 0.045905157923698425, "rewards/margins": 1.592152714729309, "rewards/rejected": -1.5462477207183838, "step": 4502 }, { "epoch": 0.52, "learning_rate": 1.464707948027625e-07, "logits/chosen": -2.946523666381836, "logits/rejected": -3.1511316299438477, "logps/chosen": -247.1290740966797, "logps/rejected": -217.93673706054688, "loss": 0.3364, "rewards/accuracies": 0.875, "rewards/chosen": -0.17224368453025818, "rewards/margins": 1.4275144338607788, "rewards/rejected": -1.5997581481933594, "step": 4503 }, { "epoch": 0.52, "learning_rate": 1.4643567833313824e-07, "logits/chosen": -2.623812675476074, "logits/rejected": -2.9154510498046875, "logps/chosen": -335.95513916015625, "logps/rejected": -165.47157287597656, "loss": 0.2971, "rewards/accuracies": 0.875, "rewards/chosen": 0.49656814336776733, "rewards/margins": 1.5633776187896729, "rewards/rejected": -1.0668094158172607, "step": 4504 }, { "epoch": 0.52, "learning_rate": 1.46400561863514e-07, "logits/chosen": -3.0457658767700195, "logits/rejected": -3.1613523960113525, "logps/chosen": -369.1893615722656, "logps/rejected": -249.90890502929688, "loss": 0.5557, "rewards/accuracies": 0.75, "rewards/chosen": 0.08801822364330292, "rewards/margins": 0.9505049586296082, "rewards/rejected": -0.862486720085144, "step": 4505 }, { "epoch": 0.52, "learning_rate": 1.4636544539388972e-07, "logits/chosen": -3.5869054794311523, "logits/rejected": -3.865297794342041, "logps/chosen": -144.99798583984375, "logps/rejected": -191.77459716796875, "loss": 0.1866, "rewards/accuracies": 0.875, "rewards/chosen": 0.3276800811290741, "rewards/margins": 3.2599399089813232, "rewards/rejected": -2.9322595596313477, "step": 4506 }, { "epoch": 0.52, "learning_rate": 1.4633032892426548e-07, "logits/chosen": -3.2022783756256104, "logits/rejected": -3.572930335998535, "logps/chosen": -188.64453125, "logps/rejected": -230.6282501220703, "loss": 0.152, "rewards/accuracies": 1.0, "rewards/chosen": 0.5223579406738281, "rewards/margins": 2.955181360244751, "rewards/rejected": -2.432823419570923, "step": 4507 }, { "epoch": 0.52, "learning_rate": 1.462952124546412e-07, "logits/chosen": -3.485935688018799, "logits/rejected": -3.160102128982544, "logps/chosen": -363.5455322265625, "logps/rejected": -287.32525634765625, "loss": 0.282, "rewards/accuracies": 0.875, "rewards/chosen": 0.571636974811554, "rewards/margins": 1.8148936033248901, "rewards/rejected": -1.2432568073272705, "step": 4508 }, { "epoch": 0.52, "learning_rate": 1.4626009598501698e-07, "logits/chosen": -2.9633209705352783, "logits/rejected": -2.9464824199676514, "logps/chosen": -273.2046813964844, "logps/rejected": -204.05027770996094, "loss": 0.3751, "rewards/accuracies": 0.875, "rewards/chosen": -0.0629628598690033, "rewards/margins": 1.535428762435913, "rewards/rejected": -1.5983915328979492, "step": 4509 }, { "epoch": 0.52, "learning_rate": 1.462249795153927e-07, "logits/chosen": -3.1914849281311035, "logits/rejected": -2.9607882499694824, "logps/chosen": -296.96063232421875, "logps/rejected": -334.2647705078125, "loss": 0.3228, "rewards/accuracies": 0.875, "rewards/chosen": 0.2850176692008972, "rewards/margins": 3.0599935054779053, "rewards/rejected": -2.7749757766723633, "step": 4510 }, { "epoch": 0.52, "learning_rate": 1.4618986304576847e-07, "logits/chosen": -2.4930849075317383, "logits/rejected": -2.5018396377563477, "logps/chosen": -242.8787078857422, "logps/rejected": -265.0387878417969, "loss": 0.6049, "rewards/accuracies": 0.625, "rewards/chosen": -0.32550880312919617, "rewards/margins": 0.7893013954162598, "rewards/rejected": -1.1148102283477783, "step": 4511 }, { "epoch": 0.52, "learning_rate": 1.461547465761442e-07, "logits/chosen": -2.8281402587890625, "logits/rejected": -2.964817762374878, "logps/chosen": -197.91180419921875, "logps/rejected": -158.55967712402344, "loss": 1.0826, "rewards/accuracies": 0.75, "rewards/chosen": -0.7154462933540344, "rewards/margins": 0.2448439598083496, "rewards/rejected": -0.9602901935577393, "step": 4512 }, { "epoch": 0.52, "learning_rate": 1.4611963010651995e-07, "logits/chosen": -2.135673761367798, "logits/rejected": -2.313724994659424, "logps/chosen": -292.45501708984375, "logps/rejected": -268.8369140625, "loss": 0.4157, "rewards/accuracies": 0.75, "rewards/chosen": -0.09686976671218872, "rewards/margins": 1.5096077919006348, "rewards/rejected": -1.6064776182174683, "step": 4513 }, { "epoch": 0.52, "learning_rate": 1.460845136368957e-07, "logits/chosen": -3.309363842010498, "logits/rejected": -3.078136920928955, "logps/chosen": -387.93377685546875, "logps/rejected": -275.83111572265625, "loss": 0.3709, "rewards/accuracies": 0.875, "rewards/chosen": 0.3221232295036316, "rewards/margins": 1.6704798936843872, "rewards/rejected": -1.3483566045761108, "step": 4514 }, { "epoch": 0.52, "learning_rate": 1.4604939716727145e-07, "logits/chosen": -2.7592804431915283, "logits/rejected": -2.7590973377227783, "logps/chosen": -225.7200469970703, "logps/rejected": -182.85997009277344, "loss": 0.5739, "rewards/accuracies": 0.625, "rewards/chosen": -0.7386026382446289, "rewards/margins": 0.6514223217964172, "rewards/rejected": -1.3900249004364014, "step": 4515 }, { "epoch": 0.52, "learning_rate": 1.4601428069764718e-07, "logits/chosen": -3.7803540229797363, "logits/rejected": -3.643894910812378, "logps/chosen": -183.2703094482422, "logps/rejected": -222.65489196777344, "loss": 0.4361, "rewards/accuracies": 0.75, "rewards/chosen": 0.16703426837921143, "rewards/margins": 1.3481113910675049, "rewards/rejected": -1.1810771226882935, "step": 4516 }, { "epoch": 0.52, "learning_rate": 1.4597916422802294e-07, "logits/chosen": -3.38935923576355, "logits/rejected": -3.526336669921875, "logps/chosen": -240.83563232421875, "logps/rejected": -247.01268005371094, "loss": 0.3558, "rewards/accuracies": 0.75, "rewards/chosen": 0.2620702087879181, "rewards/margins": 1.4285826683044434, "rewards/rejected": -1.1665124893188477, "step": 4517 }, { "epoch": 0.52, "learning_rate": 1.459440477583987e-07, "logits/chosen": -2.5997231006622314, "logits/rejected": -2.676978588104248, "logps/chosen": -381.77862548828125, "logps/rejected": -304.8307800292969, "loss": 0.2589, "rewards/accuracies": 0.875, "rewards/chosen": 0.6638932228088379, "rewards/margins": 2.397510051727295, "rewards/rejected": -1.7336170673370361, "step": 4518 }, { "epoch": 0.52, "learning_rate": 1.4590893128877442e-07, "logits/chosen": -3.4240500926971436, "logits/rejected": -3.1350722312927246, "logps/chosen": -394.4221496582031, "logps/rejected": -185.21954345703125, "loss": 0.5203, "rewards/accuracies": 0.75, "rewards/chosen": 0.15591832995414734, "rewards/margins": 1.2731294631958008, "rewards/rejected": -1.117211103439331, "step": 4519 }, { "epoch": 0.52, "learning_rate": 1.4587381481915017e-07, "logits/chosen": -3.7600042819976807, "logits/rejected": -3.584879159927368, "logps/chosen": -234.7393798828125, "logps/rejected": -302.9960632324219, "loss": 0.3496, "rewards/accuracies": 0.75, "rewards/chosen": -0.6580209732055664, "rewards/margins": 2.0400314331054688, "rewards/rejected": -2.698052406311035, "step": 4520 }, { "epoch": 0.52, "learning_rate": 1.4583869834952593e-07, "logits/chosen": -2.6463563442230225, "logits/rejected": -2.847277879714966, "logps/chosen": -376.23944091796875, "logps/rejected": -266.58392333984375, "loss": 0.1383, "rewards/accuracies": 1.0, "rewards/chosen": 0.8506337404251099, "rewards/margins": 2.9745781421661377, "rewards/rejected": -2.1239442825317383, "step": 4521 }, { "epoch": 0.52, "learning_rate": 1.4580358187990168e-07, "logits/chosen": -3.6484646797180176, "logits/rejected": -3.3016843795776367, "logps/chosen": -438.1533203125, "logps/rejected": -358.65374755859375, "loss": 0.089, "rewards/accuracies": 1.0, "rewards/chosen": 0.6232179403305054, "rewards/margins": 3.3576555252075195, "rewards/rejected": -2.7344377040863037, "step": 4522 }, { "epoch": 0.52, "learning_rate": 1.457684654102774e-07, "logits/chosen": -3.1892309188842773, "logits/rejected": -3.211682081222534, "logps/chosen": -295.88555908203125, "logps/rejected": -279.38946533203125, "loss": 0.4213, "rewards/accuracies": 0.75, "rewards/chosen": -0.28768986463546753, "rewards/margins": 1.305981993675232, "rewards/rejected": -1.5936717987060547, "step": 4523 }, { "epoch": 0.52, "learning_rate": 1.4573334894065316e-07, "logits/chosen": -2.8580026626586914, "logits/rejected": -3.077594518661499, "logps/chosen": -224.65945434570312, "logps/rejected": -124.761474609375, "loss": 0.6244, "rewards/accuracies": 0.625, "rewards/chosen": -0.017807617783546448, "rewards/margins": 0.7208440899848938, "rewards/rejected": -0.738651692867279, "step": 4524 }, { "epoch": 0.52, "learning_rate": 1.456982324710289e-07, "logits/chosen": -3.5685439109802246, "logits/rejected": -3.3410568237304688, "logps/chosen": -409.82635498046875, "logps/rejected": -305.61322021484375, "loss": 0.2554, "rewards/accuracies": 1.0, "rewards/chosen": 0.4066388010978699, "rewards/margins": 1.5497736930847168, "rewards/rejected": -1.1431347131729126, "step": 4525 }, { "epoch": 0.52, "learning_rate": 1.4566311600140467e-07, "logits/chosen": -3.1090521812438965, "logits/rejected": -3.2673966884613037, "logps/chosen": -220.19656372070312, "logps/rejected": -196.0310516357422, "loss": 0.6751, "rewards/accuracies": 0.75, "rewards/chosen": -0.5848056077957153, "rewards/margins": 1.1758410930633545, "rewards/rejected": -1.7606467008590698, "step": 4526 }, { "epoch": 0.52, "learning_rate": 1.456279995317804e-07, "logits/chosen": -3.439317464828491, "logits/rejected": -3.5200862884521484, "logps/chosen": -448.24847412109375, "logps/rejected": -566.74658203125, "loss": 0.317, "rewards/accuracies": 0.875, "rewards/chosen": 0.21919900178909302, "rewards/margins": 1.9185432195663452, "rewards/rejected": -1.699344277381897, "step": 4527 }, { "epoch": 0.52, "learning_rate": 1.4559288306215615e-07, "logits/chosen": -3.617074966430664, "logits/rejected": -3.689352512359619, "logps/chosen": -145.47332763671875, "logps/rejected": -119.52456665039062, "loss": 0.3715, "rewards/accuracies": 0.75, "rewards/chosen": 0.25587666034698486, "rewards/margins": 1.603288173675537, "rewards/rejected": -1.3474115133285522, "step": 4528 }, { "epoch": 0.52, "learning_rate": 1.4555776659253188e-07, "logits/chosen": -3.689966917037964, "logits/rejected": -4.051967620849609, "logps/chosen": -85.65380096435547, "logps/rejected": -129.17864990234375, "loss": 0.547, "rewards/accuracies": 0.5, "rewards/chosen": -0.2323906421661377, "rewards/margins": 1.0100207328796387, "rewards/rejected": -1.242411494255066, "step": 4529 }, { "epoch": 0.52, "learning_rate": 1.4552265012290763e-07, "logits/chosen": -3.255544662475586, "logits/rejected": -2.908787250518799, "logps/chosen": -314.61328125, "logps/rejected": -244.54473876953125, "loss": 0.4696, "rewards/accuracies": 0.875, "rewards/chosen": 0.22261370718479156, "rewards/margins": 2.0103931427001953, "rewards/rejected": -1.7877795696258545, "step": 4530 }, { "epoch": 0.52, "learning_rate": 1.4548753365328338e-07, "logits/chosen": -3.3215649127960205, "logits/rejected": -2.872685432434082, "logps/chosen": -328.25787353515625, "logps/rejected": -197.47457885742188, "loss": 0.3597, "rewards/accuracies": 0.75, "rewards/chosen": 0.298471063375473, "rewards/margins": 1.280191421508789, "rewards/rejected": -0.9817203283309937, "step": 4531 }, { "epoch": 0.52, "learning_rate": 1.4545241718365914e-07, "logits/chosen": -2.810771942138672, "logits/rejected": -2.8076107501983643, "logps/chosen": -189.19036865234375, "logps/rejected": -187.88670349121094, "loss": 0.7244, "rewards/accuracies": 0.875, "rewards/chosen": -0.7424294352531433, "rewards/margins": 0.777534544467926, "rewards/rejected": -1.5199639797210693, "step": 4532 }, { "epoch": 0.52, "learning_rate": 1.4541730071403487e-07, "logits/chosen": -3.2213761806488037, "logits/rejected": -3.234488010406494, "logps/chosen": -157.89962768554688, "logps/rejected": -157.41455078125, "loss": 0.2002, "rewards/accuracies": 1.0, "rewards/chosen": -0.07875081896781921, "rewards/margins": 1.9244698286056519, "rewards/rejected": -2.003220558166504, "step": 4533 }, { "epoch": 0.52, "learning_rate": 1.4538218424441062e-07, "logits/chosen": -3.7212352752685547, "logits/rejected": -3.7019686698913574, "logps/chosen": -154.13929748535156, "logps/rejected": -192.4016876220703, "loss": 0.3152, "rewards/accuracies": 0.875, "rewards/chosen": -0.2624971866607666, "rewards/margins": 3.044203281402588, "rewards/rejected": -3.3067007064819336, "step": 4534 }, { "epoch": 0.52, "learning_rate": 1.4534706777478637e-07, "logits/chosen": -3.3896772861480713, "logits/rejected": -3.346013069152832, "logps/chosen": -232.71644592285156, "logps/rejected": -201.4299774169922, "loss": 0.2177, "rewards/accuracies": 1.0, "rewards/chosen": -0.11186444759368896, "rewards/margins": 1.7746059894561768, "rewards/rejected": -1.8864705562591553, "step": 4535 }, { "epoch": 0.52, "learning_rate": 1.453119513051621e-07, "logits/chosen": -3.2645771503448486, "logits/rejected": -3.2260966300964355, "logps/chosen": -170.376220703125, "logps/rejected": -221.6679229736328, "loss": 0.2341, "rewards/accuracies": 0.875, "rewards/chosen": -0.18128237128257751, "rewards/margins": 2.3960623741149902, "rewards/rejected": -2.5773448944091797, "step": 4536 }, { "epoch": 0.52, "learning_rate": 1.4527683483553785e-07, "logits/chosen": -3.1881306171417236, "logits/rejected": -3.1731247901916504, "logps/chosen": -212.89830017089844, "logps/rejected": -194.3070068359375, "loss": 0.4836, "rewards/accuracies": 0.75, "rewards/chosen": -0.24336591362953186, "rewards/margins": 1.2229398488998413, "rewards/rejected": -1.4663057327270508, "step": 4537 }, { "epoch": 0.52, "learning_rate": 1.452417183659136e-07, "logits/chosen": -3.5487096309661865, "logits/rejected": -3.2463550567626953, "logps/chosen": -270.8427734375, "logps/rejected": -182.46804809570312, "loss": 0.3041, "rewards/accuracies": 0.875, "rewards/chosen": 0.03521338105201721, "rewards/margins": 1.657698154449463, "rewards/rejected": -1.6224846839904785, "step": 4538 }, { "epoch": 0.52, "learning_rate": 1.4520660189628936e-07, "logits/chosen": -3.2083234786987305, "logits/rejected": -3.591989517211914, "logps/chosen": -116.93147277832031, "logps/rejected": -272.55462646484375, "loss": 0.578, "rewards/accuracies": 0.75, "rewards/chosen": -0.035640276968479156, "rewards/margins": 1.617920160293579, "rewards/rejected": -1.6535604000091553, "step": 4539 }, { "epoch": 0.52, "learning_rate": 1.451714854266651e-07, "logits/chosen": -3.0910043716430664, "logits/rejected": -2.9869306087493896, "logps/chosen": -203.59634399414062, "logps/rejected": -205.57298278808594, "loss": 0.2441, "rewards/accuracies": 1.0, "rewards/chosen": 0.08964620530605316, "rewards/margins": 1.8415952920913696, "rewards/rejected": -1.7519490718841553, "step": 4540 }, { "epoch": 0.52, "learning_rate": 1.4513636895704084e-07, "logits/chosen": -3.8237862586975098, "logits/rejected": -3.81353497505188, "logps/chosen": -175.53274536132812, "logps/rejected": -164.8250274658203, "loss": 0.4683, "rewards/accuracies": 0.75, "rewards/chosen": -0.08865071833133698, "rewards/margins": 1.1709126234054565, "rewards/rejected": -1.2595632076263428, "step": 4541 }, { "epoch": 0.52, "learning_rate": 1.451012524874166e-07, "logits/chosen": -3.5613174438476562, "logits/rejected": -3.688434362411499, "logps/chosen": -271.90313720703125, "logps/rejected": -139.69740295410156, "loss": 0.2487, "rewards/accuracies": 0.875, "rewards/chosen": 0.682332456111908, "rewards/margins": 2.541240930557251, "rewards/rejected": -1.8589084148406982, "step": 4542 }, { "epoch": 0.52, "learning_rate": 1.4506613601779235e-07, "logits/chosen": -3.673847198486328, "logits/rejected": -3.4159200191497803, "logps/chosen": -213.395751953125, "logps/rejected": -208.93601989746094, "loss": 0.677, "rewards/accuracies": 0.625, "rewards/chosen": -0.38980770111083984, "rewards/margins": 0.6813926100730896, "rewards/rejected": -1.0712002515792847, "step": 4543 }, { "epoch": 0.52, "learning_rate": 1.4503101954816808e-07, "logits/chosen": -2.6312665939331055, "logits/rejected": -2.779625654220581, "logps/chosen": -284.84619140625, "logps/rejected": -395.837890625, "loss": 0.3948, "rewards/accuracies": 0.875, "rewards/chosen": 0.31688806414604187, "rewards/margins": 1.2289832830429077, "rewards/rejected": -0.9120951890945435, "step": 4544 }, { "epoch": 0.52, "learning_rate": 1.4499590307854383e-07, "logits/chosen": -3.376478672027588, "logits/rejected": -3.656719923019409, "logps/chosen": -119.56896209716797, "logps/rejected": -184.76536560058594, "loss": 0.4139, "rewards/accuracies": 0.625, "rewards/chosen": -0.2122991383075714, "rewards/margins": 1.583847165107727, "rewards/rejected": -1.796146273612976, "step": 4545 }, { "epoch": 0.52, "learning_rate": 1.4496078660891959e-07, "logits/chosen": -3.5364222526550293, "logits/rejected": -3.3374640941619873, "logps/chosen": -111.90615844726562, "logps/rejected": -133.36312866210938, "loss": 0.3972, "rewards/accuracies": 0.75, "rewards/chosen": -0.13662220537662506, "rewards/margins": 1.194567322731018, "rewards/rejected": -1.3311896324157715, "step": 4546 }, { "epoch": 0.52, "learning_rate": 1.4492567013929531e-07, "logits/chosen": -2.3638458251953125, "logits/rejected": -2.5904104709625244, "logps/chosen": -262.13629150390625, "logps/rejected": -282.65753173828125, "loss": 0.407, "rewards/accuracies": 0.75, "rewards/chosen": -0.04864295572042465, "rewards/margins": 1.4064637422561646, "rewards/rejected": -1.4551066160202026, "step": 4547 }, { "epoch": 0.52, "learning_rate": 1.4489055366967107e-07, "logits/chosen": -3.233232021331787, "logits/rejected": -3.1042211055755615, "logps/chosen": -270.427734375, "logps/rejected": -164.15811157226562, "loss": 0.5005, "rewards/accuracies": 0.875, "rewards/chosen": -0.098228819668293, "rewards/margins": 1.1732428073883057, "rewards/rejected": -1.2714717388153076, "step": 4548 }, { "epoch": 0.52, "learning_rate": 1.4485543720004682e-07, "logits/chosen": -3.498145341873169, "logits/rejected": -3.727795124053955, "logps/chosen": -275.287353515625, "logps/rejected": -202.54971313476562, "loss": 0.6072, "rewards/accuracies": 0.625, "rewards/chosen": -0.8178858160972595, "rewards/margins": 0.9077301621437073, "rewards/rejected": -1.7256159782409668, "step": 4549 }, { "epoch": 0.52, "learning_rate": 1.4482032073042258e-07, "logits/chosen": -2.4420251846313477, "logits/rejected": -2.5405540466308594, "logps/chosen": -374.2143859863281, "logps/rejected": -330.7633361816406, "loss": 0.4153, "rewards/accuracies": 0.75, "rewards/chosen": 0.1848951131105423, "rewards/margins": 2.0091989040374756, "rewards/rejected": -1.8243037462234497, "step": 4550 }, { "epoch": 0.52, "learning_rate": 1.447852042607983e-07, "logits/chosen": -2.64052677154541, "logits/rejected": -2.637890338897705, "logps/chosen": -272.3152160644531, "logps/rejected": -247.793701171875, "loss": 0.3195, "rewards/accuracies": 0.875, "rewards/chosen": 0.38582882285118103, "rewards/margins": 1.779038906097412, "rewards/rejected": -1.3932100534439087, "step": 4551 }, { "epoch": 0.52, "learning_rate": 1.4475008779117406e-07, "logits/chosen": -4.040778160095215, "logits/rejected": -3.296001434326172, "logps/chosen": -294.9337158203125, "logps/rejected": -199.19500732421875, "loss": 0.2015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8824362754821777, "rewards/margins": 2.226883888244629, "rewards/rejected": -3.1093201637268066, "step": 4552 }, { "epoch": 0.52, "learning_rate": 1.4471497132154978e-07, "logits/chosen": -3.4529309272766113, "logits/rejected": -3.3314085006713867, "logps/chosen": -216.83767700195312, "logps/rejected": -162.25607299804688, "loss": 0.401, "rewards/accuracies": 0.75, "rewards/chosen": 0.0001845136284828186, "rewards/margins": 1.6669538021087646, "rewards/rejected": -1.6667691469192505, "step": 4553 }, { "epoch": 0.52, "learning_rate": 1.4467985485192556e-07, "logits/chosen": -2.461735963821411, "logits/rejected": -2.4717156887054443, "logps/chosen": -160.6231231689453, "logps/rejected": -191.66448974609375, "loss": 0.3535, "rewards/accuracies": 0.75, "rewards/chosen": -0.35553234815597534, "rewards/margins": 2.0131242275238037, "rewards/rejected": -2.3686563968658447, "step": 4554 }, { "epoch": 0.53, "learning_rate": 1.446447383823013e-07, "logits/chosen": -3.109954833984375, "logits/rejected": -3.0926620960235596, "logps/chosen": -305.19732666015625, "logps/rejected": -229.22430419921875, "loss": 0.2968, "rewards/accuracies": 1.0, "rewards/chosen": 0.07442638278007507, "rewards/margins": 1.4922597408294678, "rewards/rejected": -1.4178333282470703, "step": 4555 }, { "epoch": 0.53, "learning_rate": 1.4460962191267705e-07, "logits/chosen": -2.54498291015625, "logits/rejected": -2.602360486984253, "logps/chosen": -474.4638671875, "logps/rejected": -347.944091796875, "loss": 0.2012, "rewards/accuracies": 1.0, "rewards/chosen": -0.2334541380405426, "rewards/margins": 2.7988123893737793, "rewards/rejected": -3.032266616821289, "step": 4556 }, { "epoch": 0.53, "learning_rate": 1.4457450544305277e-07, "logits/chosen": -2.531528949737549, "logits/rejected": -2.7545177936553955, "logps/chosen": -372.0047607421875, "logps/rejected": -424.03729248046875, "loss": 0.6357, "rewards/accuracies": 0.75, "rewards/chosen": -0.085598424077034, "rewards/margins": 0.9958369731903076, "rewards/rejected": -1.0814354419708252, "step": 4557 }, { "epoch": 0.53, "learning_rate": 1.4453938897342853e-07, "logits/chosen": -2.9523301124572754, "logits/rejected": -2.9954721927642822, "logps/chosen": -139.28836059570312, "logps/rejected": -154.98193359375, "loss": 0.5023, "rewards/accuracies": 0.5, "rewards/chosen": -0.21690887212753296, "rewards/margins": 1.1869704723358154, "rewards/rejected": -1.4038792848587036, "step": 4558 }, { "epoch": 0.53, "learning_rate": 1.4450427250380428e-07, "logits/chosen": -3.3322062492370605, "logits/rejected": -3.3333044052124023, "logps/chosen": -196.14761352539062, "logps/rejected": -198.09396362304688, "loss": 0.5792, "rewards/accuracies": 0.75, "rewards/chosen": -0.45698341727256775, "rewards/margins": 1.8107103109359741, "rewards/rejected": -2.267693519592285, "step": 4559 }, { "epoch": 0.53, "learning_rate": 1.4446915603418003e-07, "logits/chosen": -3.0984978675842285, "logits/rejected": -2.8150222301483154, "logps/chosen": -257.37353515625, "logps/rejected": -262.20587158203125, "loss": 0.095, "rewards/accuracies": 1.0, "rewards/chosen": 0.7484826445579529, "rewards/margins": 3.245074510574341, "rewards/rejected": -2.496591806411743, "step": 4560 }, { "epoch": 0.53, "learning_rate": 1.4443403956455576e-07, "logits/chosen": -3.2418785095214844, "logits/rejected": -3.5650794506073, "logps/chosen": -180.28765869140625, "logps/rejected": -196.23568725585938, "loss": 0.3169, "rewards/accuracies": 1.0, "rewards/chosen": -0.01657930016517639, "rewards/margins": 1.5889216661453247, "rewards/rejected": -1.6055009365081787, "step": 4561 }, { "epoch": 0.53, "learning_rate": 1.4439892309493152e-07, "logits/chosen": -3.2095441818237305, "logits/rejected": -3.250314950942993, "logps/chosen": -100.3193359375, "logps/rejected": -209.22415161132812, "loss": 0.1695, "rewards/accuracies": 1.0, "rewards/chosen": 0.36452147364616394, "rewards/margins": 2.124643564224243, "rewards/rejected": -1.7601221799850464, "step": 4562 }, { "epoch": 0.53, "learning_rate": 1.4436380662530727e-07, "logits/chosen": -3.7570927143096924, "logits/rejected": -3.773345470428467, "logps/chosen": -350.0870361328125, "logps/rejected": -317.1756896972656, "loss": 0.4121, "rewards/accuracies": 0.75, "rewards/chosen": 0.02452746033668518, "rewards/margins": 1.4482901096343994, "rewards/rejected": -1.4237627983093262, "step": 4563 }, { "epoch": 0.53, "learning_rate": 1.44328690155683e-07, "logits/chosen": -2.7558796405792236, "logits/rejected": -2.824437141418457, "logps/chosen": -253.21527099609375, "logps/rejected": -287.856201171875, "loss": 0.2744, "rewards/accuracies": 1.0, "rewards/chosen": -0.02722858637571335, "rewards/margins": 3.4359841346740723, "rewards/rejected": -3.463212728500366, "step": 4564 }, { "epoch": 0.53, "learning_rate": 1.4429357368605875e-07, "logits/chosen": -3.440643310546875, "logits/rejected": -3.2163705825805664, "logps/chosen": -397.6063232421875, "logps/rejected": -314.3331298828125, "loss": 0.1153, "rewards/accuracies": 1.0, "rewards/chosen": 0.6726202964782715, "rewards/margins": 3.234943151473999, "rewards/rejected": -2.5623230934143066, "step": 4565 }, { "epoch": 0.53, "learning_rate": 1.442584572164345e-07, "logits/chosen": -2.8255410194396973, "logits/rejected": -2.8346610069274902, "logps/chosen": -173.08009338378906, "logps/rejected": -190.33807373046875, "loss": 0.4175, "rewards/accuracies": 0.875, "rewards/chosen": 0.1201213076710701, "rewards/margins": 1.3137295246124268, "rewards/rejected": -1.193608045578003, "step": 4566 }, { "epoch": 0.53, "learning_rate": 1.4422334074681026e-07, "logits/chosen": -3.2480435371398926, "logits/rejected": -3.2287280559539795, "logps/chosen": -302.8789978027344, "logps/rejected": -283.169189453125, "loss": 0.4636, "rewards/accuracies": 0.75, "rewards/chosen": -0.3232758641242981, "rewards/margins": 1.1039314270019531, "rewards/rejected": -1.4272072315216064, "step": 4567 }, { "epoch": 0.53, "learning_rate": 1.4418822427718599e-07, "logits/chosen": -3.321974277496338, "logits/rejected": -3.1685423851013184, "logps/chosen": -373.6947021484375, "logps/rejected": -312.84967041015625, "loss": 0.2023, "rewards/accuracies": 1.0, "rewards/chosen": 0.4350305199623108, "rewards/margins": 2.217876672744751, "rewards/rejected": -1.7828460931777954, "step": 4568 }, { "epoch": 0.53, "learning_rate": 1.4415310780756174e-07, "logits/chosen": -3.5175695419311523, "logits/rejected": -3.632200241088867, "logps/chosen": -313.5005798339844, "logps/rejected": -355.1815185546875, "loss": 0.2374, "rewards/accuracies": 0.875, "rewards/chosen": 0.09643249213695526, "rewards/margins": 2.1383137702941895, "rewards/rejected": -2.0418810844421387, "step": 4569 }, { "epoch": 0.53, "learning_rate": 1.4411799133793747e-07, "logits/chosen": -3.549882411956787, "logits/rejected": -3.3712549209594727, "logps/chosen": -154.60055541992188, "logps/rejected": -270.7834777832031, "loss": 0.6021, "rewards/accuracies": 0.5, "rewards/chosen": -0.4910079538822174, "rewards/margins": 0.645799994468689, "rewards/rejected": -1.136807918548584, "step": 4570 }, { "epoch": 0.53, "learning_rate": 1.4408287486831325e-07, "logits/chosen": -3.262845039367676, "logits/rejected": -3.05125093460083, "logps/chosen": -215.6374053955078, "logps/rejected": -191.34481811523438, "loss": 0.3327, "rewards/accuracies": 0.875, "rewards/chosen": -0.45906418561935425, "rewards/margins": 1.6038028001785278, "rewards/rejected": -2.0628671646118164, "step": 4571 }, { "epoch": 0.53, "learning_rate": 1.4404775839868897e-07, "logits/chosen": -2.6186609268188477, "logits/rejected": -2.422074317932129, "logps/chosen": -258.3731689453125, "logps/rejected": -185.49208068847656, "loss": 0.9902, "rewards/accuracies": 0.375, "rewards/chosen": -1.0658153295516968, "rewards/margins": 0.2729703187942505, "rewards/rejected": -1.3387857675552368, "step": 4572 }, { "epoch": 0.53, "learning_rate": 1.4401264192906473e-07, "logits/chosen": -3.162992000579834, "logits/rejected": -3.011418342590332, "logps/chosen": -177.0764923095703, "logps/rejected": -239.6697998046875, "loss": 0.847, "rewards/accuracies": 0.5, "rewards/chosen": 0.23454564809799194, "rewards/margins": 0.42406052350997925, "rewards/rejected": -0.1895148605108261, "step": 4573 }, { "epoch": 0.53, "learning_rate": 1.4397752545944046e-07, "logits/chosen": -3.091902732849121, "logits/rejected": -3.0524702072143555, "logps/chosen": -313.5158386230469, "logps/rejected": -329.92767333984375, "loss": 0.1631, "rewards/accuracies": 1.0, "rewards/chosen": 0.08120383322238922, "rewards/margins": 2.048280715942383, "rewards/rejected": -1.9670766592025757, "step": 4574 }, { "epoch": 0.53, "learning_rate": 1.439424089898162e-07, "logits/chosen": -3.274196147918701, "logits/rejected": -2.5410547256469727, "logps/chosen": -182.36268615722656, "logps/rejected": -168.9410858154297, "loss": 0.5924, "rewards/accuracies": 0.75, "rewards/chosen": -0.14333055913448334, "rewards/margins": 1.574354648590088, "rewards/rejected": -1.7176851034164429, "step": 4575 }, { "epoch": 0.53, "learning_rate": 1.4390729252019196e-07, "logits/chosen": -2.6212213039398193, "logits/rejected": -2.731314182281494, "logps/chosen": -518.8555297851562, "logps/rejected": -434.15521240234375, "loss": 0.5506, "rewards/accuracies": 0.625, "rewards/chosen": -0.8988786935806274, "rewards/margins": 1.0927289724349976, "rewards/rejected": -1.991607666015625, "step": 4576 }, { "epoch": 0.53, "learning_rate": 1.4387217605056772e-07, "logits/chosen": -3.500058174133301, "logits/rejected": -3.516181230545044, "logps/chosen": -116.54816436767578, "logps/rejected": -145.6199188232422, "loss": 0.286, "rewards/accuracies": 0.875, "rewards/chosen": 0.023239217698574066, "rewards/margins": 1.7005071640014648, "rewards/rejected": -1.6772680282592773, "step": 4577 }, { "epoch": 0.53, "learning_rate": 1.4383705958094344e-07, "logits/chosen": -2.840970993041992, "logits/rejected": -2.6827032566070557, "logps/chosen": -136.5832977294922, "logps/rejected": -180.3238067626953, "loss": 0.2752, "rewards/accuracies": 1.0, "rewards/chosen": -0.38811808824539185, "rewards/margins": 1.4643056392669678, "rewards/rejected": -1.8524236679077148, "step": 4578 }, { "epoch": 0.53, "learning_rate": 1.438019431113192e-07, "logits/chosen": -3.246171474456787, "logits/rejected": -3.0562143325805664, "logps/chosen": -273.30572509765625, "logps/rejected": -195.18002319335938, "loss": 0.3046, "rewards/accuracies": 0.75, "rewards/chosen": -0.06066463142633438, "rewards/margins": 1.9875521659851074, "rewards/rejected": -2.0482168197631836, "step": 4579 }, { "epoch": 0.53, "learning_rate": 1.4376682664169495e-07, "logits/chosen": -2.9094042778015137, "logits/rejected": -2.979661703109741, "logps/chosen": -236.27906799316406, "logps/rejected": -248.86660766601562, "loss": 0.6234, "rewards/accuracies": 0.75, "rewards/chosen": -0.13385868072509766, "rewards/margins": 1.3448107242584229, "rewards/rejected": -1.4786694049835205, "step": 4580 }, { "epoch": 0.53, "learning_rate": 1.4373171017207068e-07, "logits/chosen": -3.8400301933288574, "logits/rejected": -3.7674267292022705, "logps/chosen": -358.4031982421875, "logps/rejected": -254.9463653564453, "loss": 0.7624, "rewards/accuracies": 0.625, "rewards/chosen": -0.8132634162902832, "rewards/margins": 0.7580273151397705, "rewards/rejected": -1.5712908506393433, "step": 4581 }, { "epoch": 0.53, "learning_rate": 1.4369659370244643e-07, "logits/chosen": -2.625728130340576, "logits/rejected": -2.787111282348633, "logps/chosen": -252.4321746826172, "logps/rejected": -229.53628540039062, "loss": 0.5251, "rewards/accuracies": 0.625, "rewards/chosen": -0.31606990098953247, "rewards/margins": 1.0829651355743408, "rewards/rejected": -1.399035096168518, "step": 4582 }, { "epoch": 0.53, "learning_rate": 1.436614772328222e-07, "logits/chosen": -2.91627836227417, "logits/rejected": -2.7716126441955566, "logps/chosen": -336.0606689453125, "logps/rejected": -376.5575866699219, "loss": 0.5416, "rewards/accuracies": 0.75, "rewards/chosen": -0.7730026245117188, "rewards/margins": 1.593205213546753, "rewards/rejected": -2.3662075996398926, "step": 4583 }, { "epoch": 0.53, "learning_rate": 1.4362636076319794e-07, "logits/chosen": -3.792706251144409, "logits/rejected": -3.4713611602783203, "logps/chosen": -439.90863037109375, "logps/rejected": -489.5748291015625, "loss": 0.2277, "rewards/accuracies": 1.0, "rewards/chosen": -0.05380548909306526, "rewards/margins": 3.1306755542755127, "rewards/rejected": -3.184481143951416, "step": 4584 }, { "epoch": 0.53, "learning_rate": 1.4359124429357367e-07, "logits/chosen": -3.6243393421173096, "logits/rejected": -3.4488272666931152, "logps/chosen": -234.18824768066406, "logps/rejected": -195.2311248779297, "loss": 0.382, "rewards/accuracies": 0.875, "rewards/chosen": -0.2825080454349518, "rewards/margins": 1.0459206104278564, "rewards/rejected": -1.3284286260604858, "step": 4585 }, { "epoch": 0.53, "learning_rate": 1.4355612782394942e-07, "logits/chosen": -3.2642922401428223, "logits/rejected": -3.262443780899048, "logps/chosen": -342.9779052734375, "logps/rejected": -301.19854736328125, "loss": 0.3956, "rewards/accuracies": 0.75, "rewards/chosen": -0.21773774921894073, "rewards/margins": 1.4630885124206543, "rewards/rejected": -1.6808264255523682, "step": 4586 }, { "epoch": 0.53, "learning_rate": 1.4352101135432518e-07, "logits/chosen": -3.0994157791137695, "logits/rejected": -2.7915172576904297, "logps/chosen": -433.4837646484375, "logps/rejected": -281.1668701171875, "loss": 0.2921, "rewards/accuracies": 0.875, "rewards/chosen": 0.5110697746276855, "rewards/margins": 1.85420823097229, "rewards/rejected": -1.3431384563446045, "step": 4587 }, { "epoch": 0.53, "learning_rate": 1.4348589488470093e-07, "logits/chosen": -2.4662575721740723, "logits/rejected": -2.4495491981506348, "logps/chosen": -297.8823547363281, "logps/rejected": -392.26434326171875, "loss": 0.0707, "rewards/accuracies": 1.0, "rewards/chosen": 0.6297708749771118, "rewards/margins": 3.1127095222473145, "rewards/rejected": -2.482938528060913, "step": 4588 }, { "epoch": 0.53, "learning_rate": 1.4345077841507666e-07, "logits/chosen": -3.109022617340088, "logits/rejected": -2.682875156402588, "logps/chosen": -261.17578125, "logps/rejected": -200.8707275390625, "loss": 0.4731, "rewards/accuracies": 0.75, "rewards/chosen": -0.16605547070503235, "rewards/margins": 1.8451519012451172, "rewards/rejected": -2.011207342147827, "step": 4589 }, { "epoch": 0.53, "learning_rate": 1.434156619454524e-07, "logits/chosen": -3.6277854442596436, "logits/rejected": -3.4555397033691406, "logps/chosen": -251.6851043701172, "logps/rejected": -263.46002197265625, "loss": 0.3772, "rewards/accuracies": 0.875, "rewards/chosen": -0.504522979259491, "rewards/margins": 1.3603379726409912, "rewards/rejected": -1.864861011505127, "step": 4590 }, { "epoch": 0.53, "learning_rate": 1.4338054547582817e-07, "logits/chosen": -4.043933868408203, "logits/rejected": -3.6875576972961426, "logps/chosen": -299.49560546875, "logps/rejected": -191.56935119628906, "loss": 0.6094, "rewards/accuracies": 0.75, "rewards/chosen": -0.3058225214481354, "rewards/margins": 0.9476535320281982, "rewards/rejected": -1.2534761428833008, "step": 4591 }, { "epoch": 0.53, "learning_rate": 1.433454290062039e-07, "logits/chosen": -3.690720558166504, "logits/rejected": -3.22398042678833, "logps/chosen": -191.22264099121094, "logps/rejected": -139.98681640625, "loss": 0.473, "rewards/accuracies": 0.625, "rewards/chosen": 0.10526669770479202, "rewards/margins": 1.5428450107574463, "rewards/rejected": -1.4375783205032349, "step": 4592 }, { "epoch": 0.53, "learning_rate": 1.4331031253657965e-07, "logits/chosen": -2.88511323928833, "logits/rejected": -3.0347137451171875, "logps/chosen": -109.47860717773438, "logps/rejected": -131.4563446044922, "loss": 0.3728, "rewards/accuracies": 0.75, "rewards/chosen": -0.36035746335983276, "rewards/margins": 1.3290297985076904, "rewards/rejected": -1.689387321472168, "step": 4593 }, { "epoch": 0.53, "learning_rate": 1.432751960669554e-07, "logits/chosen": -2.8215506076812744, "logits/rejected": -2.836158514022827, "logps/chosen": -338.61968994140625, "logps/rejected": -487.50244140625, "loss": 0.345, "rewards/accuracies": 0.875, "rewards/chosen": 0.3590484857559204, "rewards/margins": 2.185894012451172, "rewards/rejected": -1.826845407485962, "step": 4594 }, { "epoch": 0.53, "learning_rate": 1.4324007959733115e-07, "logits/chosen": -2.957360029220581, "logits/rejected": -2.9505960941314697, "logps/chosen": -284.9811706542969, "logps/rejected": -312.8219299316406, "loss": 0.2696, "rewards/accuracies": 1.0, "rewards/chosen": 0.11642752587795258, "rewards/margins": 1.5495460033416748, "rewards/rejected": -1.4331185817718506, "step": 4595 }, { "epoch": 0.53, "learning_rate": 1.4320496312770688e-07, "logits/chosen": -3.1995177268981934, "logits/rejected": -3.34700345993042, "logps/chosen": -114.85945892333984, "logps/rejected": -203.09185791015625, "loss": 0.244, "rewards/accuracies": 0.875, "rewards/chosen": 0.165593221783638, "rewards/margins": 2.6160504817962646, "rewards/rejected": -2.4504568576812744, "step": 4596 }, { "epoch": 0.53, "learning_rate": 1.4316984665808264e-07, "logits/chosen": -2.668905258178711, "logits/rejected": -2.4016284942626953, "logps/chosen": -210.7563018798828, "logps/rejected": -264.86126708984375, "loss": 0.363, "rewards/accuracies": 0.625, "rewards/chosen": -0.4332781434059143, "rewards/margins": 1.7063796520233154, "rewards/rejected": -2.139657735824585, "step": 4597 }, { "epoch": 0.53, "learning_rate": 1.4313473018845836e-07, "logits/chosen": -3.028956413269043, "logits/rejected": -3.2863101959228516, "logps/chosen": -400.9277038574219, "logps/rejected": -171.89358520507812, "loss": 0.6773, "rewards/accuracies": 0.5, "rewards/chosen": -0.1865827590227127, "rewards/margins": 1.0037997961044312, "rewards/rejected": -1.1903825998306274, "step": 4598 }, { "epoch": 0.53, "learning_rate": 1.4309961371883414e-07, "logits/chosen": -3.1716747283935547, "logits/rejected": -3.069523811340332, "logps/chosen": -265.24066162109375, "logps/rejected": -233.5712127685547, "loss": 0.3198, "rewards/accuracies": 0.75, "rewards/chosen": 0.09300629049539566, "rewards/margins": 2.3481945991516113, "rewards/rejected": -2.25518798828125, "step": 4599 }, { "epoch": 0.53, "learning_rate": 1.4306449724920987e-07, "logits/chosen": -3.2458336353302, "logits/rejected": -3.231771945953369, "logps/chosen": -184.26431274414062, "logps/rejected": -215.38148498535156, "loss": 0.2037, "rewards/accuracies": 1.0, "rewards/chosen": 0.11146067082881927, "rewards/margins": 2.8260674476623535, "rewards/rejected": -2.714606761932373, "step": 4600 }, { "epoch": 0.53, "learning_rate": 1.4302938077958562e-07, "logits/chosen": -3.5882365703582764, "logits/rejected": -3.5013482570648193, "logps/chosen": -207.10707092285156, "logps/rejected": -252.5864715576172, "loss": 0.2717, "rewards/accuracies": 0.875, "rewards/chosen": 0.11974400281906128, "rewards/margins": 1.7795013189315796, "rewards/rejected": -1.659757137298584, "step": 4601 }, { "epoch": 0.53, "learning_rate": 1.4299426430996135e-07, "logits/chosen": -3.222297191619873, "logits/rejected": -3.1981773376464844, "logps/chosen": -175.72015380859375, "logps/rejected": -137.79771423339844, "loss": 0.7187, "rewards/accuracies": 0.625, "rewards/chosen": -0.4052913784980774, "rewards/margins": 0.5377423763275146, "rewards/rejected": -0.943033754825592, "step": 4602 }, { "epoch": 0.53, "learning_rate": 1.429591478403371e-07, "logits/chosen": -3.5260543823242188, "logits/rejected": -3.3549399375915527, "logps/chosen": -399.3951416015625, "logps/rejected": -342.8174133300781, "loss": 0.4493, "rewards/accuracies": 0.75, "rewards/chosen": -0.30068516731262207, "rewards/margins": 0.9108918905258179, "rewards/rejected": -1.21157705783844, "step": 4603 }, { "epoch": 0.53, "learning_rate": 1.4292403137071286e-07, "logits/chosen": -3.377521276473999, "logits/rejected": -3.3984177112579346, "logps/chosen": -287.64056396484375, "logps/rejected": -319.78948974609375, "loss": 0.3835, "rewards/accuracies": 0.75, "rewards/chosen": -0.026783868670463562, "rewards/margins": 1.526104211807251, "rewards/rejected": -1.552888035774231, "step": 4604 }, { "epoch": 0.53, "learning_rate": 1.4288891490108861e-07, "logits/chosen": -2.391757011413574, "logits/rejected": -2.209441661834717, "logps/chosen": -65.5755386352539, "logps/rejected": -131.6504669189453, "loss": 0.3651, "rewards/accuracies": 1.0, "rewards/chosen": 0.11203740537166595, "rewards/margins": 0.9377835988998413, "rewards/rejected": -0.8257461786270142, "step": 4605 }, { "epoch": 0.53, "learning_rate": 1.4285379843146434e-07, "logits/chosen": -3.7533528804779053, "logits/rejected": -4.035670280456543, "logps/chosen": -314.9356994628906, "logps/rejected": -307.5156555175781, "loss": 0.3608, "rewards/accuracies": 0.875, "rewards/chosen": -0.2547226548194885, "rewards/margins": 1.7278378009796143, "rewards/rejected": -1.982560396194458, "step": 4606 }, { "epoch": 0.53, "learning_rate": 1.428186819618401e-07, "logits/chosen": -4.130621910095215, "logits/rejected": -3.7439889907836914, "logps/chosen": -272.05084228515625, "logps/rejected": -244.03213500976562, "loss": 0.3218, "rewards/accuracies": 0.875, "rewards/chosen": -0.021763205528259277, "rewards/margins": 2.253709077835083, "rewards/rejected": -2.2754721641540527, "step": 4607 }, { "epoch": 0.53, "learning_rate": 1.4278356549221585e-07, "logits/chosen": -2.8559913635253906, "logits/rejected": -3.159079074859619, "logps/chosen": -226.8187255859375, "logps/rejected": -211.24705505371094, "loss": 0.4086, "rewards/accuracies": 0.875, "rewards/chosen": 0.054330699145793915, "rewards/margins": 0.9295872449874878, "rewards/rejected": -0.8752565383911133, "step": 4608 }, { "epoch": 0.53, "learning_rate": 1.4274844902259158e-07, "logits/chosen": -2.7460527420043945, "logits/rejected": -2.7483699321746826, "logps/chosen": -335.0164489746094, "logps/rejected": -196.95849609375, "loss": 0.4616, "rewards/accuracies": 0.75, "rewards/chosen": -0.5476331114768982, "rewards/margins": 1.0728462934494019, "rewards/rejected": -1.6204794645309448, "step": 4609 }, { "epoch": 0.53, "learning_rate": 1.4271333255296733e-07, "logits/chosen": -3.693007469177246, "logits/rejected": -3.4511523246765137, "logps/chosen": -166.8677978515625, "logps/rejected": -145.77413940429688, "loss": 0.4142, "rewards/accuracies": 0.75, "rewards/chosen": -0.025151312351226807, "rewards/margins": 1.8659749031066895, "rewards/rejected": -1.8911261558532715, "step": 4610 }, { "epoch": 0.53, "learning_rate": 1.4267821608334308e-07, "logits/chosen": -2.1685128211975098, "logits/rejected": -2.5386693477630615, "logps/chosen": -185.9340057373047, "logps/rejected": -172.3708953857422, "loss": 0.6493, "rewards/accuracies": 0.5, "rewards/chosen": -0.36689960956573486, "rewards/margins": 0.34803205728530884, "rewards/rejected": -0.7149317264556885, "step": 4611 }, { "epoch": 0.53, "learning_rate": 1.4264309961371884e-07, "logits/chosen": -2.4270427227020264, "logits/rejected": -2.670897960662842, "logps/chosen": -163.20852661132812, "logps/rejected": -197.865234375, "loss": 0.2082, "rewards/accuracies": 1.0, "rewards/chosen": -0.2065826654434204, "rewards/margins": 1.8135071992874146, "rewards/rejected": -2.020089864730835, "step": 4612 }, { "epoch": 0.53, "learning_rate": 1.4260798314409457e-07, "logits/chosen": -3.1261074542999268, "logits/rejected": -3.315230369567871, "logps/chosen": -208.6081085205078, "logps/rejected": -170.54977416992188, "loss": 0.3615, "rewards/accuracies": 0.875, "rewards/chosen": -0.31942033767700195, "rewards/margins": 1.8757269382476807, "rewards/rejected": -2.1951472759246826, "step": 4613 }, { "epoch": 0.53, "learning_rate": 1.4257286667447032e-07, "logits/chosen": -4.088421821594238, "logits/rejected": -3.9290683269500732, "logps/chosen": -199.40283203125, "logps/rejected": -179.57009887695312, "loss": 0.48, "rewards/accuracies": 0.625, "rewards/chosen": -0.18187138438224792, "rewards/margins": 0.900641679763794, "rewards/rejected": -1.0825130939483643, "step": 4614 }, { "epoch": 0.53, "learning_rate": 1.4253775020484607e-07, "logits/chosen": -2.104647636413574, "logits/rejected": -2.253892421722412, "logps/chosen": -294.1556091308594, "logps/rejected": -257.3692321777344, "loss": 0.2571, "rewards/accuracies": 1.0, "rewards/chosen": 0.3456215560436249, "rewards/margins": 2.419269323348999, "rewards/rejected": -2.0736477375030518, "step": 4615 }, { "epoch": 0.53, "learning_rate": 1.4250263373522183e-07, "logits/chosen": -3.243697166442871, "logits/rejected": -3.0998575687408447, "logps/chosen": -312.909423828125, "logps/rejected": -350.56201171875, "loss": 0.2439, "rewards/accuracies": 0.875, "rewards/chosen": 0.21444807946681976, "rewards/margins": 2.4560890197753906, "rewards/rejected": -2.241641044616699, "step": 4616 }, { "epoch": 0.53, "learning_rate": 1.4246751726559755e-07, "logits/chosen": -2.850947141647339, "logits/rejected": -3.095313787460327, "logps/chosen": -201.74822998046875, "logps/rejected": -220.62673950195312, "loss": 0.2127, "rewards/accuracies": 1.0, "rewards/chosen": -0.014567950740456581, "rewards/margins": 2.471315622329712, "rewards/rejected": -2.4858834743499756, "step": 4617 }, { "epoch": 0.53, "learning_rate": 1.424324007959733e-07, "logits/chosen": -2.916757583618164, "logits/rejected": -3.2335076332092285, "logps/chosen": -254.91783142089844, "logps/rejected": -292.67095947265625, "loss": 0.2285, "rewards/accuracies": 1.0, "rewards/chosen": -0.15013496577739716, "rewards/margins": 2.0953125953674316, "rewards/rejected": -2.2454476356506348, "step": 4618 }, { "epoch": 0.53, "learning_rate": 1.4239728432634904e-07, "logits/chosen": -3.587071180343628, "logits/rejected": -3.352964401245117, "logps/chosen": -162.939208984375, "logps/rejected": -234.41358947753906, "loss": 0.4668, "rewards/accuracies": 0.75, "rewards/chosen": -0.12905755639076233, "rewards/margins": 1.4858365058898926, "rewards/rejected": -1.6148940324783325, "step": 4619 }, { "epoch": 0.53, "learning_rate": 1.423621678567248e-07, "logits/chosen": -3.0941572189331055, "logits/rejected": -3.323101282119751, "logps/chosen": -304.724609375, "logps/rejected": -304.7298583984375, "loss": 0.8569, "rewards/accuracies": 0.625, "rewards/chosen": -0.23589180409908295, "rewards/margins": 1.6888132095336914, "rewards/rejected": -1.924704909324646, "step": 4620 }, { "epoch": 0.53, "learning_rate": 1.4232705138710054e-07, "logits/chosen": -3.1968417167663574, "logits/rejected": -3.2664029598236084, "logps/chosen": -250.17294311523438, "logps/rejected": -257.6095275878906, "loss": 0.4558, "rewards/accuracies": 0.875, "rewards/chosen": -0.31729021668434143, "rewards/margins": 0.8636941313743591, "rewards/rejected": -1.1809842586517334, "step": 4621 }, { "epoch": 0.53, "learning_rate": 1.422919349174763e-07, "logits/chosen": -2.7375307083129883, "logits/rejected": -2.786038398742676, "logps/chosen": -202.6448211669922, "logps/rejected": -323.6896057128906, "loss": 0.2909, "rewards/accuracies": 1.0, "rewards/chosen": 0.6523049473762512, "rewards/margins": 1.5118284225463867, "rewards/rejected": -0.8595235347747803, "step": 4622 }, { "epoch": 0.53, "learning_rate": 1.4225681844785202e-07, "logits/chosen": -2.834411144256592, "logits/rejected": -2.951763153076172, "logps/chosen": -204.31112670898438, "logps/rejected": -191.3324737548828, "loss": 0.676, "rewards/accuracies": 0.5, "rewards/chosen": -0.3486764132976532, "rewards/margins": 0.4741789400577545, "rewards/rejected": -0.8228553533554077, "step": 4623 }, { "epoch": 0.53, "learning_rate": 1.4222170197822778e-07, "logits/chosen": -3.4489760398864746, "logits/rejected": -3.5248024463653564, "logps/chosen": -195.9695587158203, "logps/rejected": -247.38052368164062, "loss": 0.3433, "rewards/accuracies": 0.875, "rewards/chosen": 0.3724173605442047, "rewards/margins": 1.3800979852676392, "rewards/rejected": -1.0076805353164673, "step": 4624 }, { "epoch": 0.53, "learning_rate": 1.4218658550860353e-07, "logits/chosen": -3.6186683177948, "logits/rejected": -3.325660467147827, "logps/chosen": -222.59072875976562, "logps/rejected": -133.26699829101562, "loss": 0.6922, "rewards/accuracies": 0.625, "rewards/chosen": -0.6611917018890381, "rewards/margins": 0.5636796951293945, "rewards/rejected": -1.2248713970184326, "step": 4625 }, { "epoch": 0.53, "learning_rate": 1.4215146903897926e-07, "logits/chosen": -3.1923749446868896, "logits/rejected": -3.6259548664093018, "logps/chosen": -220.82205200195312, "logps/rejected": -204.48068237304688, "loss": 0.6344, "rewards/accuracies": 0.75, "rewards/chosen": -0.2766581177711487, "rewards/margins": 0.924058735370636, "rewards/rejected": -1.2007167339324951, "step": 4626 }, { "epoch": 0.53, "learning_rate": 1.42116352569355e-07, "logits/chosen": -2.1793856620788574, "logits/rejected": -2.2384564876556396, "logps/chosen": -143.64585876464844, "logps/rejected": -208.00254821777344, "loss": 0.6703, "rewards/accuracies": 0.5, "rewards/chosen": -0.5607753396034241, "rewards/margins": 1.1841896772384644, "rewards/rejected": -1.7449650764465332, "step": 4627 }, { "epoch": 0.53, "learning_rate": 1.4208123609973077e-07, "logits/chosen": -3.1614902019500732, "logits/rejected": -3.391058921813965, "logps/chosen": -219.74337768554688, "logps/rejected": -257.330810546875, "loss": 0.2489, "rewards/accuracies": 0.875, "rewards/chosen": -0.05867205560207367, "rewards/margins": 1.9495136737823486, "rewards/rejected": -2.008185863494873, "step": 4628 }, { "epoch": 0.53, "learning_rate": 1.4204611963010652e-07, "logits/chosen": -3.5170469284057617, "logits/rejected": -3.4494740962982178, "logps/chosen": -145.37135314941406, "logps/rejected": -229.75003051757812, "loss": 0.3401, "rewards/accuracies": 0.875, "rewards/chosen": 0.1338469535112381, "rewards/margins": 1.5853161811828613, "rewards/rejected": -1.4514694213867188, "step": 4629 }, { "epoch": 0.53, "learning_rate": 1.4201100316048225e-07, "logits/chosen": -3.2329773902893066, "logits/rejected": -3.286543369293213, "logps/chosen": -181.6484375, "logps/rejected": -189.28201293945312, "loss": 0.4321, "rewards/accuracies": 0.75, "rewards/chosen": -0.346297025680542, "rewards/margins": 1.6266545057296753, "rewards/rejected": -1.9729516506195068, "step": 4630 }, { "epoch": 0.53, "learning_rate": 1.41975886690858e-07, "logits/chosen": -3.439216136932373, "logits/rejected": -3.405200481414795, "logps/chosen": -211.86582946777344, "logps/rejected": -310.40289306640625, "loss": 0.3858, "rewards/accuracies": 0.875, "rewards/chosen": 0.23384466767311096, "rewards/margins": 1.7470457553863525, "rewards/rejected": -1.5132009983062744, "step": 4631 }, { "epoch": 0.53, "learning_rate": 1.4194077022123376e-07, "logits/chosen": -4.04298210144043, "logits/rejected": -3.778897762298584, "logps/chosen": -132.88592529296875, "logps/rejected": -156.7513885498047, "loss": 0.4545, "rewards/accuracies": 0.75, "rewards/chosen": -0.24449820816516876, "rewards/margins": 0.9277173280715942, "rewards/rejected": -1.1722155809402466, "step": 4632 }, { "epoch": 0.53, "learning_rate": 1.419056537516095e-07, "logits/chosen": -3.053957462310791, "logits/rejected": -3.1718177795410156, "logps/chosen": -169.68153381347656, "logps/rejected": -204.7326202392578, "loss": 0.3637, "rewards/accuracies": 0.875, "rewards/chosen": 0.052358999848365784, "rewards/margins": 1.3198165893554688, "rewards/rejected": -1.2674574851989746, "step": 4633 }, { "epoch": 0.53, "learning_rate": 1.4187053728198524e-07, "logits/chosen": -3.364145278930664, "logits/rejected": -3.5119853019714355, "logps/chosen": -247.12188720703125, "logps/rejected": -287.3652038574219, "loss": 0.6167, "rewards/accuracies": 0.5, "rewards/chosen": -0.1498367339372635, "rewards/margins": 0.45358219742774963, "rewards/rejected": -0.6034189462661743, "step": 4634 }, { "epoch": 0.53, "learning_rate": 1.41835420812361e-07, "logits/chosen": -2.6197762489318848, "logits/rejected": -2.4730117321014404, "logps/chosen": -264.7226257324219, "logps/rejected": -211.59378051757812, "loss": 0.1728, "rewards/accuracies": 1.0, "rewards/chosen": 0.7698895931243896, "rewards/margins": 2.7609078884124756, "rewards/rejected": -1.991018295288086, "step": 4635 }, { "epoch": 0.53, "learning_rate": 1.4180030434273674e-07, "logits/chosen": -2.725533962249756, "logits/rejected": -2.8058531284332275, "logps/chosen": -171.79757690429688, "logps/rejected": -219.75584411621094, "loss": 0.3394, "rewards/accuracies": 0.75, "rewards/chosen": -0.1167459487915039, "rewards/margins": 2.7010581493377686, "rewards/rejected": -2.8178038597106934, "step": 4636 }, { "epoch": 0.53, "learning_rate": 1.4176518787311247e-07, "logits/chosen": -3.052982807159424, "logits/rejected": -3.0893144607543945, "logps/chosen": -255.37319946289062, "logps/rejected": -334.57379150390625, "loss": 0.9105, "rewards/accuracies": 0.875, "rewards/chosen": -0.4372723400592804, "rewards/margins": 1.8997199535369873, "rewards/rejected": -2.3369922637939453, "step": 4637 }, { "epoch": 0.53, "learning_rate": 1.4173007140348823e-07, "logits/chosen": -2.5956056118011475, "logits/rejected": -2.904731035232544, "logps/chosen": -249.20111083984375, "logps/rejected": -245.632568359375, "loss": 0.7877, "rewards/accuracies": 0.5, "rewards/chosen": -0.4067867398262024, "rewards/margins": 1.210875153541565, "rewards/rejected": -1.617661952972412, "step": 4638 }, { "epoch": 0.53, "learning_rate": 1.4169495493386398e-07, "logits/chosen": -3.4260201454162598, "logits/rejected": -3.7473998069763184, "logps/chosen": -214.485595703125, "logps/rejected": -365.0674743652344, "loss": 0.2743, "rewards/accuracies": 0.875, "rewards/chosen": -0.011061137542128563, "rewards/margins": 3.0875535011291504, "rewards/rejected": -3.0986146926879883, "step": 4639 }, { "epoch": 0.53, "learning_rate": 1.4165983846423973e-07, "logits/chosen": -2.5056331157684326, "logits/rejected": -2.765486240386963, "logps/chosen": -423.15118408203125, "logps/rejected": -337.60528564453125, "loss": 0.2667, "rewards/accuracies": 1.0, "rewards/chosen": 0.008405506610870361, "rewards/margins": 2.8290276527404785, "rewards/rejected": -2.820622205734253, "step": 4640 }, { "epoch": 0.54, "learning_rate": 1.4162472199461546e-07, "logits/chosen": -3.0209665298461914, "logits/rejected": -2.8075129985809326, "logps/chosen": -334.4495849609375, "logps/rejected": -214.6701202392578, "loss": 0.3539, "rewards/accuracies": 0.75, "rewards/chosen": -0.49488046765327454, "rewards/margins": 1.2564538717269897, "rewards/rejected": -1.7513344287872314, "step": 4641 }, { "epoch": 0.54, "learning_rate": 1.4158960552499122e-07, "logits/chosen": -3.435163974761963, "logits/rejected": -3.3027267456054688, "logps/chosen": -325.6151123046875, "logps/rejected": -243.59970092773438, "loss": 0.7463, "rewards/accuracies": 0.625, "rewards/chosen": -0.48360806703567505, "rewards/margins": 0.5647733211517334, "rewards/rejected": -1.0483814477920532, "step": 4642 }, { "epoch": 0.54, "learning_rate": 1.4155448905536694e-07, "logits/chosen": -3.4138598442077637, "logits/rejected": -3.23895525932312, "logps/chosen": -274.93743896484375, "logps/rejected": -279.0364990234375, "loss": 0.4294, "rewards/accuracies": 0.75, "rewards/chosen": -0.5230594873428345, "rewards/margins": 2.832472801208496, "rewards/rejected": -3.35553240776062, "step": 4643 }, { "epoch": 0.54, "learning_rate": 1.4151937258574272e-07, "logits/chosen": -2.8412296772003174, "logits/rejected": -2.6380763053894043, "logps/chosen": -221.533447265625, "logps/rejected": -192.26138305664062, "loss": 0.4012, "rewards/accuracies": 0.625, "rewards/chosen": 0.3895959258079529, "rewards/margins": 1.2296205759048462, "rewards/rejected": -0.8400247097015381, "step": 4644 }, { "epoch": 0.54, "learning_rate": 1.4148425611611845e-07, "logits/chosen": -3.062307119369507, "logits/rejected": -3.032033920288086, "logps/chosen": -183.82199096679688, "logps/rejected": -188.78472900390625, "loss": 0.3519, "rewards/accuracies": 0.875, "rewards/chosen": 0.007539689540863037, "rewards/margins": 1.270946979522705, "rewards/rejected": -1.2634073495864868, "step": 4645 }, { "epoch": 0.54, "learning_rate": 1.414491396464942e-07, "logits/chosen": -3.6543631553649902, "logits/rejected": -3.7638378143310547, "logps/chosen": -177.5653076171875, "logps/rejected": -213.0252227783203, "loss": 0.2569, "rewards/accuracies": 0.875, "rewards/chosen": 0.321727991104126, "rewards/margins": 3.050652265548706, "rewards/rejected": -2.728924512863159, "step": 4646 }, { "epoch": 0.54, "learning_rate": 1.4141402317686993e-07, "logits/chosen": -3.4963760375976562, "logits/rejected": -3.193756103515625, "logps/chosen": -186.1361541748047, "logps/rejected": -177.0031280517578, "loss": 1.0611, "rewards/accuracies": 0.25, "rewards/chosen": -1.0598514080047607, "rewards/margins": 0.10249119997024536, "rewards/rejected": -1.1623425483703613, "step": 4647 }, { "epoch": 0.54, "learning_rate": 1.413789067072457e-07, "logits/chosen": -3.4884800910949707, "logits/rejected": -3.5109357833862305, "logps/chosen": -266.54388427734375, "logps/rejected": -261.41131591796875, "loss": 0.596, "rewards/accuracies": 0.875, "rewards/chosen": -0.20411443710327148, "rewards/margins": 1.427620530128479, "rewards/rejected": -1.6317349672317505, "step": 4648 }, { "epoch": 0.54, "learning_rate": 1.4134379023762144e-07, "logits/chosen": -3.406165838241577, "logits/rejected": -3.526381015777588, "logps/chosen": -105.44722747802734, "logps/rejected": -140.10626220703125, "loss": 0.4194, "rewards/accuracies": 0.75, "rewards/chosen": 0.1854010820388794, "rewards/margins": 1.8816380500793457, "rewards/rejected": -1.6962368488311768, "step": 4649 }, { "epoch": 0.54, "learning_rate": 1.413086737679972e-07, "logits/chosen": -3.075880765914917, "logits/rejected": -3.1867194175720215, "logps/chosen": -226.75888061523438, "logps/rejected": -270.6036376953125, "loss": 0.6404, "rewards/accuracies": 0.75, "rewards/chosen": -0.21310710906982422, "rewards/margins": 1.0599406957626343, "rewards/rejected": -1.273047685623169, "step": 4650 }, { "epoch": 0.54, "learning_rate": 1.4127355729837292e-07, "logits/chosen": -2.968837022781372, "logits/rejected": -2.864225387573242, "logps/chosen": -276.084716796875, "logps/rejected": -237.89459228515625, "loss": 0.5462, "rewards/accuracies": 0.875, "rewards/chosen": -0.11995664238929749, "rewards/margins": 0.638728141784668, "rewards/rejected": -0.7586847543716431, "step": 4651 }, { "epoch": 0.54, "learning_rate": 1.4123844082874867e-07, "logits/chosen": -3.6682209968566895, "logits/rejected": -3.6186184883117676, "logps/chosen": -168.29066467285156, "logps/rejected": -130.3525390625, "loss": 0.4596, "rewards/accuracies": 0.75, "rewards/chosen": -0.44341227412223816, "rewards/margins": 1.503288984298706, "rewards/rejected": -1.946701169013977, "step": 4652 }, { "epoch": 0.54, "learning_rate": 1.4120332435912443e-07, "logits/chosen": -2.894619941711426, "logits/rejected": -2.8599884510040283, "logps/chosen": -150.656005859375, "logps/rejected": -185.25869750976562, "loss": 0.3589, "rewards/accuracies": 0.75, "rewards/chosen": -0.08228855580091476, "rewards/margins": 2.2661499977111816, "rewards/rejected": -2.3484387397766113, "step": 4653 }, { "epoch": 0.54, "learning_rate": 1.4116820788950016e-07, "logits/chosen": -4.018063068389893, "logits/rejected": -3.637143611907959, "logps/chosen": -270.9552917480469, "logps/rejected": -244.3907470703125, "loss": 0.2357, "rewards/accuracies": 0.875, "rewards/chosen": -0.04697135090827942, "rewards/margins": 2.9214859008789062, "rewards/rejected": -2.9684574604034424, "step": 4654 }, { "epoch": 0.54, "learning_rate": 1.411330914198759e-07, "logits/chosen": -2.8683011531829834, "logits/rejected": -2.807751178741455, "logps/chosen": -132.15916442871094, "logps/rejected": -129.3094482421875, "loss": 0.473, "rewards/accuracies": 0.875, "rewards/chosen": 0.3225565552711487, "rewards/margins": 1.3265774250030518, "rewards/rejected": -1.0040208101272583, "step": 4655 }, { "epoch": 0.54, "learning_rate": 1.4109797495025166e-07, "logits/chosen": -2.4702959060668945, "logits/rejected": -2.5995278358459473, "logps/chosen": -285.7110595703125, "logps/rejected": -237.1140594482422, "loss": 0.3736, "rewards/accuracies": 0.875, "rewards/chosen": 0.1175636500120163, "rewards/margins": 1.5881903171539307, "rewards/rejected": -1.4706265926361084, "step": 4656 }, { "epoch": 0.54, "learning_rate": 1.4106285848062742e-07, "logits/chosen": -3.650271415710449, "logits/rejected": -3.368236541748047, "logps/chosen": -250.63536071777344, "logps/rejected": -226.20860290527344, "loss": 0.3522, "rewards/accuracies": 0.875, "rewards/chosen": -0.35954898595809937, "rewards/margins": 1.8012950420379639, "rewards/rejected": -2.160843849182129, "step": 4657 }, { "epoch": 0.54, "learning_rate": 1.4102774201100314e-07, "logits/chosen": -2.871408700942993, "logits/rejected": -3.0128707885742188, "logps/chosen": -254.10948181152344, "logps/rejected": -275.2237243652344, "loss": 0.4799, "rewards/accuracies": 0.625, "rewards/chosen": 0.2701619565486908, "rewards/margins": 1.6599647998809814, "rewards/rejected": -1.3898028135299683, "step": 4658 }, { "epoch": 0.54, "learning_rate": 1.409926255413789e-07, "logits/chosen": -2.9990861415863037, "logits/rejected": -2.586759328842163, "logps/chosen": -212.7902374267578, "logps/rejected": -194.28961181640625, "loss": 0.2141, "rewards/accuracies": 1.0, "rewards/chosen": 0.29550760984420776, "rewards/margins": 2.437692642211914, "rewards/rejected": -2.1421849727630615, "step": 4659 }, { "epoch": 0.54, "learning_rate": 1.4095750907175465e-07, "logits/chosen": -2.8536975383758545, "logits/rejected": -2.702028751373291, "logps/chosen": -308.3877258300781, "logps/rejected": -301.6170959472656, "loss": 0.3885, "rewards/accuracies": 0.875, "rewards/chosen": -0.08953350782394409, "rewards/margins": 1.291090726852417, "rewards/rejected": -1.3806240558624268, "step": 4660 }, { "epoch": 0.54, "learning_rate": 1.409223926021304e-07, "logits/chosen": -2.7628026008605957, "logits/rejected": -3.0002951622009277, "logps/chosen": -350.18115234375, "logps/rejected": -236.19769287109375, "loss": 0.3732, "rewards/accuracies": 0.875, "rewards/chosen": -0.07502049207687378, "rewards/margins": 1.2234488725662231, "rewards/rejected": -1.2984693050384521, "step": 4661 }, { "epoch": 0.54, "learning_rate": 1.4088727613250613e-07, "logits/chosen": -3.0279855728149414, "logits/rejected": -3.0245797634124756, "logps/chosen": -317.2729187011719, "logps/rejected": -235.84738159179688, "loss": 0.4762, "rewards/accuracies": 0.875, "rewards/chosen": -0.09522318840026855, "rewards/margins": 1.5873258113861084, "rewards/rejected": -1.682548999786377, "step": 4662 }, { "epoch": 0.54, "learning_rate": 1.408521596628819e-07, "logits/chosen": -2.8225667476654053, "logits/rejected": -2.847048282623291, "logps/chosen": -185.09768676757812, "logps/rejected": -231.48963928222656, "loss": 0.4843, "rewards/accuracies": 0.625, "rewards/chosen": 0.1774609088897705, "rewards/margins": 1.846146821975708, "rewards/rejected": -1.6686859130859375, "step": 4663 }, { "epoch": 0.54, "learning_rate": 1.4081704319325764e-07, "logits/chosen": -3.5010719299316406, "logits/rejected": -3.5061142444610596, "logps/chosen": -252.60552978515625, "logps/rejected": -257.0217590332031, "loss": 0.7429, "rewards/accuracies": 0.625, "rewards/chosen": -0.903741717338562, "rewards/margins": 0.6754860877990723, "rewards/rejected": -1.5792276859283447, "step": 4664 }, { "epoch": 0.54, "learning_rate": 1.407819267236334e-07, "logits/chosen": -2.9090185165405273, "logits/rejected": -2.731482744216919, "logps/chosen": -144.25259399414062, "logps/rejected": -199.80506896972656, "loss": 0.3782, "rewards/accuracies": 0.625, "rewards/chosen": 0.35093986988067627, "rewards/margins": 1.4023282527923584, "rewards/rejected": -1.0513883829116821, "step": 4665 }, { "epoch": 0.54, "learning_rate": 1.4074681025400912e-07, "logits/chosen": -2.934305191040039, "logits/rejected": -2.8084001541137695, "logps/chosen": -231.37936401367188, "logps/rejected": -273.19134521484375, "loss": 0.2143, "rewards/accuracies": 0.875, "rewards/chosen": 0.22984401881694794, "rewards/margins": 2.327094316482544, "rewards/rejected": -2.097250461578369, "step": 4666 }, { "epoch": 0.54, "learning_rate": 1.4071169378438488e-07, "logits/chosen": -2.7702255249023438, "logits/rejected": -2.746880292892456, "logps/chosen": -242.52320861816406, "logps/rejected": -218.83184814453125, "loss": 0.3993, "rewards/accuracies": 0.875, "rewards/chosen": 0.11429911106824875, "rewards/margins": 1.9123032093048096, "rewards/rejected": -1.798004150390625, "step": 4667 }, { "epoch": 0.54, "learning_rate": 1.406765773147606e-07, "logits/chosen": -3.2397429943084717, "logits/rejected": -3.1407721042633057, "logps/chosen": -246.2645263671875, "logps/rejected": -201.72218322753906, "loss": 0.4119, "rewards/accuracies": 0.875, "rewards/chosen": -0.10836967080831528, "rewards/margins": 0.9455664157867432, "rewards/rejected": -1.0539361238479614, "step": 4668 }, { "epoch": 0.54, "learning_rate": 1.4064146084513636e-07, "logits/chosen": -3.1567304134368896, "logits/rejected": -3.0415146350860596, "logps/chosen": -297.19036865234375, "logps/rejected": -330.3018798828125, "loss": 0.4335, "rewards/accuracies": 0.75, "rewards/chosen": -0.2226795256137848, "rewards/margins": 0.91374671459198, "rewards/rejected": -1.136426329612732, "step": 4669 }, { "epoch": 0.54, "learning_rate": 1.406063443755121e-07, "logits/chosen": -3.565927743911743, "logits/rejected": -3.1886425018310547, "logps/chosen": -293.0825500488281, "logps/rejected": -215.7290802001953, "loss": 0.2989, "rewards/accuracies": 0.75, "rewards/chosen": 0.19525247812271118, "rewards/margins": 2.457744836807251, "rewards/rejected": -2.2624924182891846, "step": 4670 }, { "epoch": 0.54, "learning_rate": 1.4057122790588784e-07, "logits/chosen": -2.6605098247528076, "logits/rejected": -2.2641890048980713, "logps/chosen": -243.30657958984375, "logps/rejected": -273.71002197265625, "loss": 0.152, "rewards/accuracies": 1.0, "rewards/chosen": -0.2926764190196991, "rewards/margins": 2.197005271911621, "rewards/rejected": -2.4896817207336426, "step": 4671 }, { "epoch": 0.54, "learning_rate": 1.405361114362636e-07, "logits/chosen": -2.384458541870117, "logits/rejected": -2.525826930999756, "logps/chosen": -319.62860107421875, "logps/rejected": -322.22601318359375, "loss": 0.2194, "rewards/accuracies": 0.875, "rewards/chosen": 0.4804350733757019, "rewards/margins": 2.4036972522735596, "rewards/rejected": -1.923262357711792, "step": 4672 }, { "epoch": 0.54, "learning_rate": 1.4050099496663935e-07, "logits/chosen": -3.8208842277526855, "logits/rejected": -3.843156337738037, "logps/chosen": -268.55963134765625, "logps/rejected": -259.74188232421875, "loss": 0.1373, "rewards/accuracies": 1.0, "rewards/chosen": -0.2539820671081543, "rewards/margins": 3.4758763313293457, "rewards/rejected": -3.7298583984375, "step": 4673 }, { "epoch": 0.54, "learning_rate": 1.404658784970151e-07, "logits/chosen": -2.485898733139038, "logits/rejected": -2.3794684410095215, "logps/chosen": -439.56707763671875, "logps/rejected": -417.7510986328125, "loss": 0.3646, "rewards/accuracies": 0.875, "rewards/chosen": 0.37318718433380127, "rewards/margins": 1.3578860759735107, "rewards/rejected": -0.9846988916397095, "step": 4674 }, { "epoch": 0.54, "learning_rate": 1.4043076202739083e-07, "logits/chosen": -3.1545543670654297, "logits/rejected": -3.2717788219451904, "logps/chosen": -230.1149139404297, "logps/rejected": -251.31527709960938, "loss": 0.5655, "rewards/accuracies": 0.75, "rewards/chosen": 0.009678855538368225, "rewards/margins": 2.41192364692688, "rewards/rejected": -2.402245044708252, "step": 4675 }, { "epoch": 0.54, "learning_rate": 1.4039564555776658e-07, "logits/chosen": -2.5920634269714355, "logits/rejected": -2.561222553253174, "logps/chosen": -245.43800354003906, "logps/rejected": -330.5764465332031, "loss": 0.4574, "rewards/accuracies": 0.75, "rewards/chosen": -0.10159578919410706, "rewards/margins": 2.0351943969726562, "rewards/rejected": -2.1367902755737305, "step": 4676 }, { "epoch": 0.54, "learning_rate": 1.4036052908814234e-07, "logits/chosen": -3.501150608062744, "logits/rejected": -3.8247456550598145, "logps/chosen": -160.11151123046875, "logps/rejected": -229.1749267578125, "loss": 0.4396, "rewards/accuracies": 0.875, "rewards/chosen": -0.023726943880319595, "rewards/margins": 1.1383543014526367, "rewards/rejected": -1.162081241607666, "step": 4677 }, { "epoch": 0.54, "learning_rate": 1.403254126185181e-07, "logits/chosen": -3.170712947845459, "logits/rejected": -2.811497926712036, "logps/chosen": -148.25515747070312, "logps/rejected": -279.9303894042969, "loss": 0.7245, "rewards/accuracies": 0.625, "rewards/chosen": -0.2003413587808609, "rewards/margins": 0.8889018297195435, "rewards/rejected": -1.0892431735992432, "step": 4678 }, { "epoch": 0.54, "learning_rate": 1.4029029614889382e-07, "logits/chosen": -2.363435745239258, "logits/rejected": -2.544154644012451, "logps/chosen": -309.96612548828125, "logps/rejected": -437.32122802734375, "loss": 0.5681, "rewards/accuracies": 0.625, "rewards/chosen": -0.3384389877319336, "rewards/margins": 1.9136712551116943, "rewards/rejected": -2.252110242843628, "step": 4679 }, { "epoch": 0.54, "learning_rate": 1.4025517967926957e-07, "logits/chosen": -3.5231094360351562, "logits/rejected": -3.4379916191101074, "logps/chosen": -195.85107421875, "logps/rejected": -163.14642333984375, "loss": 0.9995, "rewards/accuracies": 0.5, "rewards/chosen": -0.7417532205581665, "rewards/margins": 0.7241140007972717, "rewards/rejected": -1.465867280960083, "step": 4680 }, { "epoch": 0.54, "learning_rate": 1.4022006320964532e-07, "logits/chosen": -3.1691508293151855, "logits/rejected": -3.076690435409546, "logps/chosen": -440.95489501953125, "logps/rejected": -501.3960266113281, "loss": 0.357, "rewards/accuracies": 0.875, "rewards/chosen": -0.11357621848583221, "rewards/margins": 1.9033855199813843, "rewards/rejected": -2.0169615745544434, "step": 4681 }, { "epoch": 0.54, "learning_rate": 1.4018494674002108e-07, "logits/chosen": -1.9653334617614746, "logits/rejected": -2.0161287784576416, "logps/chosen": -425.2652587890625, "logps/rejected": -312.5126037597656, "loss": 0.3806, "rewards/accuracies": 1.0, "rewards/chosen": 0.23846927285194397, "rewards/margins": 1.0259150266647339, "rewards/rejected": -0.7874457836151123, "step": 4682 }, { "epoch": 0.54, "learning_rate": 1.401498302703968e-07, "logits/chosen": -2.9213643074035645, "logits/rejected": -3.507218837738037, "logps/chosen": -127.2892074584961, "logps/rejected": -186.74868774414062, "loss": 0.557, "rewards/accuracies": 0.75, "rewards/chosen": -0.21131734549999237, "rewards/margins": 3.1384501457214355, "rewards/rejected": -3.3497676849365234, "step": 4683 }, { "epoch": 0.54, "learning_rate": 1.4011471380077256e-07, "logits/chosen": -3.2747092247009277, "logits/rejected": -3.0826008319854736, "logps/chosen": -193.3975372314453, "logps/rejected": -153.68399047851562, "loss": 0.2664, "rewards/accuracies": 0.875, "rewards/chosen": -0.21724870800971985, "rewards/margins": 1.5708012580871582, "rewards/rejected": -1.7880499362945557, "step": 4684 }, { "epoch": 0.54, "learning_rate": 1.400795973311483e-07, "logits/chosen": -2.5389785766601562, "logits/rejected": -2.555305242538452, "logps/chosen": -217.6708984375, "logps/rejected": -218.95616149902344, "loss": 0.5918, "rewards/accuracies": 0.75, "rewards/chosen": -0.33195197582244873, "rewards/margins": 0.6003660559654236, "rewards/rejected": -0.9323179721832275, "step": 4685 }, { "epoch": 0.54, "learning_rate": 1.4004448086152404e-07, "logits/chosen": -2.608656883239746, "logits/rejected": -2.7045297622680664, "logps/chosen": -420.5458984375, "logps/rejected": -375.48162841796875, "loss": 0.6963, "rewards/accuracies": 0.5, "rewards/chosen": -0.37921661138534546, "rewards/margins": 0.7585627436637878, "rewards/rejected": -1.1377793550491333, "step": 4686 }, { "epoch": 0.54, "learning_rate": 1.400093643918998e-07, "logits/chosen": -3.3471102714538574, "logits/rejected": -3.0839757919311523, "logps/chosen": -293.6072998046875, "logps/rejected": -372.4629821777344, "loss": 0.1094, "rewards/accuracies": 1.0, "rewards/chosen": 0.17730873823165894, "rewards/margins": 3.623513698577881, "rewards/rejected": -3.446204662322998, "step": 4687 }, { "epoch": 0.54, "learning_rate": 1.3997424792227552e-07, "logits/chosen": -3.351966142654419, "logits/rejected": -3.410693407058716, "logps/chosen": -260.5738220214844, "logps/rejected": -240.7497100830078, "loss": 0.1658, "rewards/accuracies": 1.0, "rewards/chosen": -0.1470785290002823, "rewards/margins": 2.271552562713623, "rewards/rejected": -2.418631076812744, "step": 4688 }, { "epoch": 0.54, "learning_rate": 1.399391314526513e-07, "logits/chosen": -3.0154452323913574, "logits/rejected": -3.4063873291015625, "logps/chosen": -231.49441528320312, "logps/rejected": -323.3878173828125, "loss": 0.3712, "rewards/accuracies": 0.875, "rewards/chosen": -0.7253485918045044, "rewards/margins": 2.124648094177246, "rewards/rejected": -2.849996566772461, "step": 4689 }, { "epoch": 0.54, "learning_rate": 1.3990401498302703e-07, "logits/chosen": -2.568394899368286, "logits/rejected": -2.6165900230407715, "logps/chosen": -182.81642150878906, "logps/rejected": -207.80699157714844, "loss": 0.1635, "rewards/accuracies": 1.0, "rewards/chosen": -0.009611807763576508, "rewards/margins": 3.1242194175720215, "rewards/rejected": -3.13383150100708, "step": 4690 }, { "epoch": 0.54, "learning_rate": 1.3986889851340278e-07, "logits/chosen": -2.581697940826416, "logits/rejected": -2.884397506713867, "logps/chosen": -354.98187255859375, "logps/rejected": -253.52713012695312, "loss": 0.6134, "rewards/accuracies": 0.625, "rewards/chosen": -0.22055235505104065, "rewards/margins": 1.9010350704193115, "rewards/rejected": -2.1215872764587402, "step": 4691 }, { "epoch": 0.54, "learning_rate": 1.398337820437785e-07, "logits/chosen": -3.029862880706787, "logits/rejected": -2.9398303031921387, "logps/chosen": -278.1956481933594, "logps/rejected": -317.8135681152344, "loss": 0.4342, "rewards/accuracies": 0.625, "rewards/chosen": -0.10420399904251099, "rewards/margins": 1.1984055042266846, "rewards/rejected": -1.3026095628738403, "step": 4692 }, { "epoch": 0.54, "learning_rate": 1.397986655741543e-07, "logits/chosen": -3.699066162109375, "logits/rejected": -3.2209718227386475, "logps/chosen": -431.55755615234375, "logps/rejected": -289.0267333984375, "loss": 0.2514, "rewards/accuracies": 1.0, "rewards/chosen": -0.07281896471977234, "rewards/margins": 2.0119473934173584, "rewards/rejected": -2.084766387939453, "step": 4693 }, { "epoch": 0.54, "learning_rate": 1.3976354910453002e-07, "logits/chosen": -3.0211410522460938, "logits/rejected": -2.8240106105804443, "logps/chosen": -419.4019775390625, "logps/rejected": -634.1710815429688, "loss": 0.5182, "rewards/accuracies": 0.75, "rewards/chosen": 0.10153341293334961, "rewards/margins": 1.6351358890533447, "rewards/rejected": -1.5336024761199951, "step": 4694 }, { "epoch": 0.54, "learning_rate": 1.3972843263490577e-07, "logits/chosen": -3.682283639907837, "logits/rejected": -3.818006992340088, "logps/chosen": -123.84111022949219, "logps/rejected": -173.22438049316406, "loss": 0.5292, "rewards/accuracies": 0.625, "rewards/chosen": -0.5851384401321411, "rewards/margins": 0.47670483589172363, "rewards/rejected": -1.0618432760238647, "step": 4695 }, { "epoch": 0.54, "learning_rate": 1.396933161652815e-07, "logits/chosen": -3.3117833137512207, "logits/rejected": -3.4310519695281982, "logps/chosen": -213.0489959716797, "logps/rejected": -242.7095489501953, "loss": 0.2543, "rewards/accuracies": 1.0, "rewards/chosen": -0.3056524693965912, "rewards/margins": 1.458221435546875, "rewards/rejected": -1.7638740539550781, "step": 4696 }, { "epoch": 0.54, "learning_rate": 1.3965819969565725e-07, "logits/chosen": -3.2982993125915527, "logits/rejected": -3.339001178741455, "logps/chosen": -156.62362670898438, "logps/rejected": -141.66297912597656, "loss": 0.4612, "rewards/accuracies": 0.875, "rewards/chosen": -0.2686714231967926, "rewards/margins": 0.6488252282142639, "rewards/rejected": -0.9174966812133789, "step": 4697 }, { "epoch": 0.54, "learning_rate": 1.39623083226033e-07, "logits/chosen": -3.4820759296417236, "logits/rejected": -3.251004219055176, "logps/chosen": -150.22772216796875, "logps/rejected": -202.87637329101562, "loss": 0.444, "rewards/accuracies": 0.75, "rewards/chosen": -0.2894437313079834, "rewards/margins": 1.8850950002670288, "rewards/rejected": -2.1745388507843018, "step": 4698 }, { "epoch": 0.54, "learning_rate": 1.3958796675640876e-07, "logits/chosen": -2.736966609954834, "logits/rejected": -2.730278491973877, "logps/chosen": -154.86595153808594, "logps/rejected": -180.88189697265625, "loss": 0.3503, "rewards/accuracies": 0.75, "rewards/chosen": 0.11602663993835449, "rewards/margins": 1.5858906507492065, "rewards/rejected": -1.4698638916015625, "step": 4699 }, { "epoch": 0.54, "learning_rate": 1.395528502867845e-07, "logits/chosen": -3.2960777282714844, "logits/rejected": -3.3512511253356934, "logps/chosen": -325.6885986328125, "logps/rejected": -362.6979675292969, "loss": 0.2672, "rewards/accuracies": 1.0, "rewards/chosen": 0.23892849683761597, "rewards/margins": 1.7996593713760376, "rewards/rejected": -1.5607309341430664, "step": 4700 }, { "epoch": 0.54, "learning_rate": 1.3951773381716024e-07, "logits/chosen": -2.905561923980713, "logits/rejected": -3.065286874771118, "logps/chosen": -196.11239624023438, "logps/rejected": -187.29652404785156, "loss": 0.376, "rewards/accuracies": 0.75, "rewards/chosen": 0.03573183715343475, "rewards/margins": 1.6489616632461548, "rewards/rejected": -1.6132298707962036, "step": 4701 }, { "epoch": 0.54, "learning_rate": 1.39482617347536e-07, "logits/chosen": -2.988740921020508, "logits/rejected": -2.919459342956543, "logps/chosen": -140.0156707763672, "logps/rejected": -194.56871032714844, "loss": 0.3494, "rewards/accuracies": 0.75, "rewards/chosen": -0.0001882016658782959, "rewards/margins": 2.126237630844116, "rewards/rejected": -2.1264257431030273, "step": 4702 }, { "epoch": 0.54, "learning_rate": 1.3944750087791172e-07, "logits/chosen": -4.135402679443359, "logits/rejected": -4.040011405944824, "logps/chosen": -203.88528442382812, "logps/rejected": -225.95120239257812, "loss": 0.2302, "rewards/accuracies": 0.875, "rewards/chosen": 0.05629311501979828, "rewards/margins": 2.4498515129089355, "rewards/rejected": -2.3935585021972656, "step": 4703 }, { "epoch": 0.54, "learning_rate": 1.3941238440828748e-07, "logits/chosen": -3.3013529777526855, "logits/rejected": -3.5394821166992188, "logps/chosen": -261.58856201171875, "logps/rejected": -266.6440734863281, "loss": 0.2494, "rewards/accuracies": 0.875, "rewards/chosen": 0.0376463383436203, "rewards/margins": 2.8416640758514404, "rewards/rejected": -2.8040177822113037, "step": 4704 }, { "epoch": 0.54, "learning_rate": 1.3937726793866323e-07, "logits/chosen": -3.944586753845215, "logits/rejected": -3.557952880859375, "logps/chosen": -454.6298828125, "logps/rejected": -387.8685302734375, "loss": 0.291, "rewards/accuracies": 0.875, "rewards/chosen": -0.2164437472820282, "rewards/margins": 3.886897087097168, "rewards/rejected": -4.1033406257629395, "step": 4705 }, { "epoch": 0.54, "learning_rate": 1.3934215146903899e-07, "logits/chosen": -3.022952079772949, "logits/rejected": -2.8368983268737793, "logps/chosen": -464.324462890625, "logps/rejected": -285.7774963378906, "loss": 0.6515, "rewards/accuracies": 0.75, "rewards/chosen": -0.19956420361995697, "rewards/margins": 0.834123432636261, "rewards/rejected": -1.0336875915527344, "step": 4706 }, { "epoch": 0.54, "learning_rate": 1.393070349994147e-07, "logits/chosen": -3.2620513439178467, "logits/rejected": -3.228442668914795, "logps/chosen": -401.785888671875, "logps/rejected": -340.40545654296875, "loss": 0.2517, "rewards/accuracies": 0.875, "rewards/chosen": 0.48069310188293457, "rewards/margins": 2.308873414993286, "rewards/rejected": -1.8281803131103516, "step": 4707 }, { "epoch": 0.54, "learning_rate": 1.3927191852979047e-07, "logits/chosen": -2.3521177768707275, "logits/rejected": -2.612572193145752, "logps/chosen": -168.17086791992188, "logps/rejected": -160.645751953125, "loss": 0.4614, "rewards/accuracies": 0.875, "rewards/chosen": -0.16594013571739197, "rewards/margins": 0.9671852588653564, "rewards/rejected": -1.1331254243850708, "step": 4708 }, { "epoch": 0.54, "learning_rate": 1.3923680206016622e-07, "logits/chosen": -3.1569342613220215, "logits/rejected": -3.1778571605682373, "logps/chosen": -246.5291748046875, "logps/rejected": -158.7622833251953, "loss": 0.4864, "rewards/accuracies": 0.625, "rewards/chosen": -0.20952104032039642, "rewards/margins": 1.1787139177322388, "rewards/rejected": -1.3882349729537964, "step": 4709 }, { "epoch": 0.54, "learning_rate": 1.3920168559054197e-07, "logits/chosen": -2.6276261806488037, "logits/rejected": -2.856135368347168, "logps/chosen": -382.7586975097656, "logps/rejected": -225.76675415039062, "loss": 0.1615, "rewards/accuracies": 1.0, "rewards/chosen": 0.3186935782432556, "rewards/margins": 2.545628786087036, "rewards/rejected": -2.2269351482391357, "step": 4710 }, { "epoch": 0.54, "learning_rate": 1.391665691209177e-07, "logits/chosen": -3.2601020336151123, "logits/rejected": -3.4754204750061035, "logps/chosen": -261.68865966796875, "logps/rejected": -290.8757019042969, "loss": 0.2334, "rewards/accuracies": 0.875, "rewards/chosen": 0.298372358083725, "rewards/margins": 2.884760856628418, "rewards/rejected": -2.58638858795166, "step": 4711 }, { "epoch": 0.54, "learning_rate": 1.3913145265129346e-07, "logits/chosen": -2.970379114151001, "logits/rejected": -2.7609448432922363, "logps/chosen": -323.9457092285156, "logps/rejected": -220.30023193359375, "loss": 0.682, "rewards/accuracies": 0.625, "rewards/chosen": -0.30516403913497925, "rewards/margins": 1.4469820261001587, "rewards/rejected": -1.7521458864212036, "step": 4712 }, { "epoch": 0.54, "learning_rate": 1.3909633618166918e-07, "logits/chosen": -2.671915054321289, "logits/rejected": -2.7442636489868164, "logps/chosen": -319.6053161621094, "logps/rejected": -272.1247253417969, "loss": 0.1853, "rewards/accuracies": 1.0, "rewards/chosen": 0.502479612827301, "rewards/margins": 2.685864210128784, "rewards/rejected": -2.183384656906128, "step": 4713 }, { "epoch": 0.54, "learning_rate": 1.3906121971204494e-07, "logits/chosen": -2.9911317825317383, "logits/rejected": -3.1705493927001953, "logps/chosen": -156.8232421875, "logps/rejected": -212.71681213378906, "loss": 0.4764, "rewards/accuracies": 0.75, "rewards/chosen": 0.42897093296051025, "rewards/margins": 1.50592839717865, "rewards/rejected": -1.0769574642181396, "step": 4714 }, { "epoch": 0.54, "learning_rate": 1.390261032424207e-07, "logits/chosen": -2.5154919624328613, "logits/rejected": -2.77345871925354, "logps/chosen": -105.4158935546875, "logps/rejected": -149.5496826171875, "loss": 0.2542, "rewards/accuracies": 0.875, "rewards/chosen": 0.6032509207725525, "rewards/margins": 2.117307186126709, "rewards/rejected": -1.5140562057495117, "step": 4715 }, { "epoch": 0.54, "learning_rate": 1.3899098677279644e-07, "logits/chosen": -2.1190104484558105, "logits/rejected": -2.1633810997009277, "logps/chosen": -342.3714599609375, "logps/rejected": -294.890869140625, "loss": 0.8875, "rewards/accuracies": 0.625, "rewards/chosen": -0.9499032497406006, "rewards/margins": 0.37363824248313904, "rewards/rejected": -1.3235414028167725, "step": 4716 }, { "epoch": 0.54, "learning_rate": 1.3895587030317217e-07, "logits/chosen": -2.767604112625122, "logits/rejected": -2.847093105316162, "logps/chosen": -362.0243225097656, "logps/rejected": -306.44097900390625, "loss": 0.2693, "rewards/accuracies": 1.0, "rewards/chosen": -0.1335172802209854, "rewards/margins": 1.8291388750076294, "rewards/rejected": -1.962656021118164, "step": 4717 }, { "epoch": 0.54, "learning_rate": 1.3892075383354793e-07, "logits/chosen": -3.319711923599243, "logits/rejected": -3.5005030632019043, "logps/chosen": -156.6175079345703, "logps/rejected": -214.26315307617188, "loss": 0.2151, "rewards/accuracies": 1.0, "rewards/chosen": 0.03350120782852173, "rewards/margins": 1.8960381746292114, "rewards/rejected": -1.8625370264053345, "step": 4718 }, { "epoch": 0.54, "learning_rate": 1.3888563736392368e-07, "logits/chosen": -2.622309446334839, "logits/rejected": -2.633680582046509, "logps/chosen": -233.0819091796875, "logps/rejected": -139.0605010986328, "loss": 0.4468, "rewards/accuracies": 0.625, "rewards/chosen": 0.040934622287750244, "rewards/margins": 1.4217700958251953, "rewards/rejected": -1.3808354139328003, "step": 4719 }, { "epoch": 0.54, "learning_rate": 1.388505208942994e-07, "logits/chosen": -3.418431520462036, "logits/rejected": -3.195725440979004, "logps/chosen": -245.3814239501953, "logps/rejected": -210.36465454101562, "loss": 0.5691, "rewards/accuracies": 0.625, "rewards/chosen": -0.0925130769610405, "rewards/margins": 1.1795004606246948, "rewards/rejected": -1.2720136642456055, "step": 4720 }, { "epoch": 0.54, "learning_rate": 1.3881540442467516e-07, "logits/chosen": -3.151397228240967, "logits/rejected": -3.3218812942504883, "logps/chosen": -310.5677490234375, "logps/rejected": -293.6673278808594, "loss": 0.2507, "rewards/accuracies": 0.875, "rewards/chosen": 0.448203444480896, "rewards/margins": 2.0137596130371094, "rewards/rejected": -1.5655561685562134, "step": 4721 }, { "epoch": 0.54, "learning_rate": 1.3878028795505091e-07, "logits/chosen": -3.7744526863098145, "logits/rejected": -3.5349133014678955, "logps/chosen": -197.65277099609375, "logps/rejected": -192.4982147216797, "loss": 0.4295, "rewards/accuracies": 0.875, "rewards/chosen": 0.06469686329364777, "rewards/margins": 1.572541356086731, "rewards/rejected": -1.5078444480895996, "step": 4722 }, { "epoch": 0.54, "learning_rate": 1.3874517148542667e-07, "logits/chosen": -3.4163730144500732, "logits/rejected": -3.4043445587158203, "logps/chosen": -286.1738586425781, "logps/rejected": -236.41534423828125, "loss": 0.2187, "rewards/accuracies": 0.875, "rewards/chosen": -0.23494893312454224, "rewards/margins": 2.4291608333587646, "rewards/rejected": -2.664109706878662, "step": 4723 }, { "epoch": 0.54, "learning_rate": 1.387100550158024e-07, "logits/chosen": -2.988279342651367, "logits/rejected": -2.858722686767578, "logps/chosen": -286.3377380371094, "logps/rejected": -289.56256103515625, "loss": 0.2929, "rewards/accuracies": 0.875, "rewards/chosen": 0.24002760648727417, "rewards/margins": 1.9144186973571777, "rewards/rejected": -1.6743909120559692, "step": 4724 }, { "epoch": 0.54, "learning_rate": 1.3867493854617815e-07, "logits/chosen": -2.806779146194458, "logits/rejected": -3.0547685623168945, "logps/chosen": -171.98867797851562, "logps/rejected": -144.65679931640625, "loss": 0.4965, "rewards/accuracies": 0.625, "rewards/chosen": -0.030558064579963684, "rewards/margins": 1.1799174547195435, "rewards/rejected": -1.2104755640029907, "step": 4725 }, { "epoch": 0.54, "learning_rate": 1.386398220765539e-07, "logits/chosen": -2.7373886108398438, "logits/rejected": -2.7626864910125732, "logps/chosen": -394.13482666015625, "logps/rejected": -245.0577392578125, "loss": 0.3135, "rewards/accuracies": 0.875, "rewards/chosen": -0.5343860387802124, "rewards/margins": 1.542799949645996, "rewards/rejected": -2.077185869216919, "step": 4726 }, { "epoch": 0.54, "learning_rate": 1.3860470560692966e-07, "logits/chosen": -3.1168816089630127, "logits/rejected": -3.2346205711364746, "logps/chosen": -218.20220947265625, "logps/rejected": -306.5650939941406, "loss": 0.1624, "rewards/accuracies": 1.0, "rewards/chosen": 0.4989706575870514, "rewards/margins": 2.6829659938812256, "rewards/rejected": -2.183995246887207, "step": 4727 }, { "epoch": 0.55, "learning_rate": 1.3856958913730538e-07, "logits/chosen": -2.559622049331665, "logits/rejected": -2.4015250205993652, "logps/chosen": -172.04736328125, "logps/rejected": -179.10598754882812, "loss": 0.6075, "rewards/accuracies": 0.625, "rewards/chosen": -0.4799600839614868, "rewards/margins": 1.6612842082977295, "rewards/rejected": -2.1412441730499268, "step": 4728 }, { "epoch": 0.55, "learning_rate": 1.3853447266768114e-07, "logits/chosen": -2.6120941638946533, "logits/rejected": -2.642918825149536, "logps/chosen": -194.12249755859375, "logps/rejected": -137.72784423828125, "loss": 0.5036, "rewards/accuracies": 0.625, "rewards/chosen": -0.23825594782829285, "rewards/margins": 0.6579334139823914, "rewards/rejected": -0.8961893320083618, "step": 4729 }, { "epoch": 0.55, "learning_rate": 1.384993561980569e-07, "logits/chosen": -2.778114080429077, "logits/rejected": -2.629110097885132, "logps/chosen": -292.0668640136719, "logps/rejected": -266.2926330566406, "loss": 0.2706, "rewards/accuracies": 1.0, "rewards/chosen": -0.33000218868255615, "rewards/margins": 1.7206790447235107, "rewards/rejected": -2.0506811141967773, "step": 4730 }, { "epoch": 0.55, "learning_rate": 1.3846423972843262e-07, "logits/chosen": -3.38948392868042, "logits/rejected": -3.453326940536499, "logps/chosen": -505.832275390625, "logps/rejected": -402.2357177734375, "loss": 0.1569, "rewards/accuracies": 1.0, "rewards/chosen": -0.3381009101867676, "rewards/margins": 2.5125632286071777, "rewards/rejected": -2.850663900375366, "step": 4731 }, { "epoch": 0.55, "learning_rate": 1.3842912325880837e-07, "logits/chosen": -2.299744129180908, "logits/rejected": -2.183795690536499, "logps/chosen": -260.35626220703125, "logps/rejected": -276.78289794921875, "loss": 0.6366, "rewards/accuracies": 0.875, "rewards/chosen": -0.12211322784423828, "rewards/margins": 0.9472222328186035, "rewards/rejected": -1.0693355798721313, "step": 4732 }, { "epoch": 0.55, "learning_rate": 1.3839400678918413e-07, "logits/chosen": -3.7000043392181396, "logits/rejected": -3.2421011924743652, "logps/chosen": -153.04244995117188, "logps/rejected": -228.89523315429688, "loss": 0.4069, "rewards/accuracies": 0.75, "rewards/chosen": -0.19727803766727448, "rewards/margins": 1.2480695247650146, "rewards/rejected": -1.445347547531128, "step": 4733 }, { "epoch": 0.55, "learning_rate": 1.3835889031955988e-07, "logits/chosen": -2.833785057067871, "logits/rejected": -2.817981719970703, "logps/chosen": -289.9038391113281, "logps/rejected": -200.1741180419922, "loss": 0.6666, "rewards/accuracies": 0.5, "rewards/chosen": 0.36155134439468384, "rewards/margins": 0.31675493717193604, "rewards/rejected": 0.04479638487100601, "step": 4734 }, { "epoch": 0.55, "learning_rate": 1.383237738499356e-07, "logits/chosen": -3.6494758129119873, "logits/rejected": -3.26887845993042, "logps/chosen": -315.0583801269531, "logps/rejected": -188.7802734375, "loss": 0.3183, "rewards/accuracies": 0.875, "rewards/chosen": -0.35809722542762756, "rewards/margins": 1.6324986219406128, "rewards/rejected": -1.9905959367752075, "step": 4735 }, { "epoch": 0.55, "learning_rate": 1.3828865738031136e-07, "logits/chosen": -2.348493814468384, "logits/rejected": -2.388981342315674, "logps/chosen": -278.9490051269531, "logps/rejected": -335.938232421875, "loss": 0.2205, "rewards/accuracies": 0.875, "rewards/chosen": 0.5573590993881226, "rewards/margins": 2.697774887084961, "rewards/rejected": -2.140415668487549, "step": 4736 }, { "epoch": 0.55, "learning_rate": 1.382535409106871e-07, "logits/chosen": -2.520266056060791, "logits/rejected": -2.6790950298309326, "logps/chosen": -131.79978942871094, "logps/rejected": -258.58502197265625, "loss": 0.5491, "rewards/accuracies": 0.625, "rewards/chosen": 0.06394777446985245, "rewards/margins": 1.9960215091705322, "rewards/rejected": -1.932073712348938, "step": 4737 }, { "epoch": 0.55, "learning_rate": 1.3821842444106287e-07, "logits/chosen": -3.3347573280334473, "logits/rejected": -3.3170440196990967, "logps/chosen": -132.14028930664062, "logps/rejected": -283.2257080078125, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": 0.06014992296695709, "rewards/margins": 3.2007343769073486, "rewards/rejected": -3.1405844688415527, "step": 4738 }, { "epoch": 0.55, "learning_rate": 1.381833079714386e-07, "logits/chosen": -3.2767345905303955, "logits/rejected": -3.1271183490753174, "logps/chosen": -140.1322784423828, "logps/rejected": -264.0682373046875, "loss": 0.345, "rewards/accuracies": 0.875, "rewards/chosen": -0.08590898662805557, "rewards/margins": 2.190504550933838, "rewards/rejected": -2.2764134407043457, "step": 4739 }, { "epoch": 0.55, "learning_rate": 1.3814819150181435e-07, "logits/chosen": -3.019453763961792, "logits/rejected": -2.7211737632751465, "logps/chosen": -335.6041259765625, "logps/rejected": -306.0028991699219, "loss": 0.1029, "rewards/accuracies": 1.0, "rewards/chosen": 0.35958099365234375, "rewards/margins": 2.7053897380828857, "rewards/rejected": -2.345808982849121, "step": 4740 }, { "epoch": 0.55, "learning_rate": 1.3811307503219008e-07, "logits/chosen": -3.5259194374084473, "logits/rejected": -3.7692911624908447, "logps/chosen": -68.09434509277344, "logps/rejected": -232.1933135986328, "loss": 0.1391, "rewards/accuracies": 1.0, "rewards/chosen": 0.20137208700180054, "rewards/margins": 3.4941813945770264, "rewards/rejected": -3.29280948638916, "step": 4741 }, { "epoch": 0.55, "learning_rate": 1.3807795856256583e-07, "logits/chosen": -3.0738956928253174, "logits/rejected": -2.8246266841888428, "logps/chosen": -422.4683837890625, "logps/rejected": -364.1755676269531, "loss": 0.5965, "rewards/accuracies": 0.625, "rewards/chosen": 0.08188116550445557, "rewards/margins": 1.6663875579833984, "rewards/rejected": -1.5845062732696533, "step": 4742 }, { "epoch": 0.55, "learning_rate": 1.3804284209294159e-07, "logits/chosen": -3.40942120552063, "logits/rejected": -3.2601516246795654, "logps/chosen": -284.62567138671875, "logps/rejected": -340.7852478027344, "loss": 0.5613, "rewards/accuracies": 0.625, "rewards/chosen": 0.23790070414543152, "rewards/margins": 0.9650171995162964, "rewards/rejected": -0.7271165251731873, "step": 4743 }, { "epoch": 0.55, "learning_rate": 1.3800772562331734e-07, "logits/chosen": -3.602189302444458, "logits/rejected": -3.447265625, "logps/chosen": -302.26953125, "logps/rejected": -283.555908203125, "loss": 0.5365, "rewards/accuracies": 0.625, "rewards/chosen": 0.07298246771097183, "rewards/margins": 1.5904346704483032, "rewards/rejected": -1.5174522399902344, "step": 4744 }, { "epoch": 0.55, "learning_rate": 1.3797260915369307e-07, "logits/chosen": -3.6791839599609375, "logits/rejected": -3.621366500854492, "logps/chosen": -240.1277313232422, "logps/rejected": -209.745361328125, "loss": 0.7993, "rewards/accuracies": 0.5, "rewards/chosen": -0.3555561602115631, "rewards/margins": 0.8271989822387695, "rewards/rejected": -1.1827552318572998, "step": 4745 }, { "epoch": 0.55, "learning_rate": 1.3793749268406882e-07, "logits/chosen": -2.9438323974609375, "logits/rejected": -3.0170044898986816, "logps/chosen": -300.80706787109375, "logps/rejected": -386.40228271484375, "loss": 0.1835, "rewards/accuracies": 1.0, "rewards/chosen": -0.25196754932403564, "rewards/margins": 2.5038819313049316, "rewards/rejected": -2.755849838256836, "step": 4746 }, { "epoch": 0.55, "learning_rate": 1.3790237621444458e-07, "logits/chosen": -3.1970205307006836, "logits/rejected": -3.5423922538757324, "logps/chosen": -239.9945068359375, "logps/rejected": -229.82025146484375, "loss": 0.311, "rewards/accuracies": 0.875, "rewards/chosen": 0.3635498285293579, "rewards/margins": 2.9292171001434326, "rewards/rejected": -2.5656676292419434, "step": 4747 }, { "epoch": 0.55, "learning_rate": 1.378672597448203e-07, "logits/chosen": -2.9111506938934326, "logits/rejected": -3.1504087448120117, "logps/chosen": -221.18809509277344, "logps/rejected": -253.2954864501953, "loss": 0.2859, "rewards/accuracies": 0.875, "rewards/chosen": 0.5290796756744385, "rewards/margins": 2.065487861633301, "rewards/rejected": -1.5364079475402832, "step": 4748 }, { "epoch": 0.55, "learning_rate": 1.3783214327519606e-07, "logits/chosen": -2.6343464851379395, "logits/rejected": -2.4619016647338867, "logps/chosen": -341.64337158203125, "logps/rejected": -333.05474853515625, "loss": 0.2774, "rewards/accuracies": 0.875, "rewards/chosen": -0.21963489055633545, "rewards/margins": 2.0280959606170654, "rewards/rejected": -2.2477309703826904, "step": 4749 }, { "epoch": 0.55, "learning_rate": 1.377970268055718e-07, "logits/chosen": -3.430717945098877, "logits/rejected": -3.4509775638580322, "logps/chosen": -120.05661010742188, "logps/rejected": -216.53517150878906, "loss": 0.221, "rewards/accuracies": 1.0, "rewards/chosen": 0.34520742297172546, "rewards/margins": 2.9100663661956787, "rewards/rejected": -2.56485915184021, "step": 4750 }, { "epoch": 0.55, "learning_rate": 1.3776191033594756e-07, "logits/chosen": -2.9677910804748535, "logits/rejected": -3.0517513751983643, "logps/chosen": -308.7592468261719, "logps/rejected": -257.6866760253906, "loss": 0.3783, "rewards/accuracies": 0.75, "rewards/chosen": -0.4154060184955597, "rewards/margins": 2.0111756324768066, "rewards/rejected": -2.426581382751465, "step": 4751 }, { "epoch": 0.55, "learning_rate": 1.377267938663233e-07, "logits/chosen": -3.241328716278076, "logits/rejected": -3.1433277130126953, "logps/chosen": -392.1812438964844, "logps/rejected": -280.78363037109375, "loss": 0.7511, "rewards/accuracies": 0.5, "rewards/chosen": 0.017477944493293762, "rewards/margins": 0.6029520034790039, "rewards/rejected": -0.5854740142822266, "step": 4752 }, { "epoch": 0.55, "learning_rate": 1.3769167739669905e-07, "logits/chosen": -3.722938299179077, "logits/rejected": -3.385831594467163, "logps/chosen": -303.8262634277344, "logps/rejected": -239.1048583984375, "loss": 0.4689, "rewards/accuracies": 0.75, "rewards/chosen": -0.04275143891572952, "rewards/margins": 2.1554694175720215, "rewards/rejected": -2.198220729827881, "step": 4753 }, { "epoch": 0.55, "learning_rate": 1.376565609270748e-07, "logits/chosen": -2.7159626483917236, "logits/rejected": -2.6557090282440186, "logps/chosen": -336.16033935546875, "logps/rejected": -331.54425048828125, "loss": 0.2238, "rewards/accuracies": 0.875, "rewards/chosen": 0.28443628549575806, "rewards/margins": 2.945359230041504, "rewards/rejected": -2.6609230041503906, "step": 4754 }, { "epoch": 0.55, "learning_rate": 1.3762144445745055e-07, "logits/chosen": -3.290484666824341, "logits/rejected": -3.150261878967285, "logps/chosen": -211.47274780273438, "logps/rejected": -266.88946533203125, "loss": 0.4109, "rewards/accuracies": 0.625, "rewards/chosen": 0.14907371997833252, "rewards/margins": 2.3213891983032227, "rewards/rejected": -2.172315835952759, "step": 4755 }, { "epoch": 0.55, "learning_rate": 1.3758632798782628e-07, "logits/chosen": -3.523017168045044, "logits/rejected": -3.6178290843963623, "logps/chosen": -246.3368682861328, "logps/rejected": -326.0635986328125, "loss": 0.3711, "rewards/accuracies": 0.75, "rewards/chosen": 0.16913747787475586, "rewards/margins": 1.975386142730713, "rewards/rejected": -1.8062485456466675, "step": 4756 }, { "epoch": 0.55, "learning_rate": 1.3755121151820203e-07, "logits/chosen": -3.230670928955078, "logits/rejected": -2.73875093460083, "logps/chosen": -300.72222900390625, "logps/rejected": -249.3724365234375, "loss": 0.2861, "rewards/accuracies": 1.0, "rewards/chosen": -0.5103797316551208, "rewards/margins": 1.8377691507339478, "rewards/rejected": -2.348148822784424, "step": 4757 }, { "epoch": 0.55, "learning_rate": 1.375160950485778e-07, "logits/chosen": -3.124053478240967, "logits/rejected": -2.880309581756592, "logps/chosen": -278.70562744140625, "logps/rejected": -325.6264953613281, "loss": 0.2804, "rewards/accuracies": 0.875, "rewards/chosen": 0.24388253688812256, "rewards/margins": 1.9178682565689087, "rewards/rejected": -1.6739857196807861, "step": 4758 }, { "epoch": 0.55, "learning_rate": 1.3748097857895352e-07, "logits/chosen": -3.0835227966308594, "logits/rejected": -3.179074764251709, "logps/chosen": -315.98004150390625, "logps/rejected": -341.903564453125, "loss": 0.297, "rewards/accuracies": 0.75, "rewards/chosen": 0.21314242482185364, "rewards/margins": 2.545158624649048, "rewards/rejected": -2.3320164680480957, "step": 4759 }, { "epoch": 0.55, "learning_rate": 1.3744586210932927e-07, "logits/chosen": -2.894813299179077, "logits/rejected": -3.2617759704589844, "logps/chosen": -310.975830078125, "logps/rejected": -162.55955505371094, "loss": 0.5817, "rewards/accuracies": 0.75, "rewards/chosen": -0.7916417121887207, "rewards/margins": 1.0256259441375732, "rewards/rejected": -1.8172677755355835, "step": 4760 }, { "epoch": 0.55, "learning_rate": 1.3741074563970502e-07, "logits/chosen": -2.90720796585083, "logits/rejected": -2.8846826553344727, "logps/chosen": -256.6512756347656, "logps/rejected": -121.60044860839844, "loss": 0.4394, "rewards/accuracies": 0.875, "rewards/chosen": 0.22586476802825928, "rewards/margins": 1.1595749855041504, "rewards/rejected": -0.9337102770805359, "step": 4761 }, { "epoch": 0.55, "learning_rate": 1.3737562917008075e-07, "logits/chosen": -2.7786357402801514, "logits/rejected": -2.803652286529541, "logps/chosen": -358.59515380859375, "logps/rejected": -394.73077392578125, "loss": 0.2176, "rewards/accuracies": 0.875, "rewards/chosen": -0.06906656175851822, "rewards/margins": 1.9555354118347168, "rewards/rejected": -2.024601936340332, "step": 4762 }, { "epoch": 0.55, "learning_rate": 1.373405127004565e-07, "logits/chosen": -2.944713592529297, "logits/rejected": -3.0749309062957764, "logps/chosen": -217.2674560546875, "logps/rejected": -182.97012329101562, "loss": 0.3266, "rewards/accuracies": 0.75, "rewards/chosen": 0.16660384833812714, "rewards/margins": 1.3222711086273193, "rewards/rejected": -1.1556673049926758, "step": 4763 }, { "epoch": 0.55, "learning_rate": 1.3730539623083226e-07, "logits/chosen": -3.0557074546813965, "logits/rejected": -2.946401357650757, "logps/chosen": -163.19021606445312, "logps/rejected": -240.97003173828125, "loss": 0.3182, "rewards/accuracies": 0.875, "rewards/chosen": 0.05854501202702522, "rewards/margins": 1.8657103776931763, "rewards/rejected": -1.8071653842926025, "step": 4764 }, { "epoch": 0.55, "learning_rate": 1.3727027976120799e-07, "logits/chosen": -3.425889015197754, "logits/rejected": -3.277805805206299, "logps/chosen": -373.53753662109375, "logps/rejected": -452.89080810546875, "loss": 0.648, "rewards/accuracies": 0.75, "rewards/chosen": -0.5641120672225952, "rewards/margins": 2.3204426765441895, "rewards/rejected": -2.884554386138916, "step": 4765 }, { "epoch": 0.55, "learning_rate": 1.3723516329158374e-07, "logits/chosen": -3.17521595954895, "logits/rejected": -3.098491907119751, "logps/chosen": -232.3490447998047, "logps/rejected": -283.936767578125, "loss": 0.7542, "rewards/accuracies": 0.5, "rewards/chosen": -0.38145989179611206, "rewards/margins": 1.3110957145690918, "rewards/rejected": -1.692555546760559, "step": 4766 }, { "epoch": 0.55, "learning_rate": 1.372000468219595e-07, "logits/chosen": -3.1587986946105957, "logits/rejected": -3.6165928840637207, "logps/chosen": -206.18096923828125, "logps/rejected": -216.03515625, "loss": 0.119, "rewards/accuracies": 1.0, "rewards/chosen": 0.7203680872917175, "rewards/margins": 2.8779284954071045, "rewards/rejected": -2.157560348510742, "step": 4767 }, { "epoch": 0.55, "learning_rate": 1.3716493035233525e-07, "logits/chosen": -2.966845989227295, "logits/rejected": -3.5483169555664062, "logps/chosen": -188.53749084472656, "logps/rejected": -287.4612121582031, "loss": 0.3192, "rewards/accuracies": 0.875, "rewards/chosen": 0.24445034563541412, "rewards/margins": 2.1717090606689453, "rewards/rejected": -1.9272587299346924, "step": 4768 }, { "epoch": 0.55, "learning_rate": 1.3712981388271098e-07, "logits/chosen": -2.7689146995544434, "logits/rejected": -2.6971542835235596, "logps/chosen": -349.046875, "logps/rejected": -344.5328369140625, "loss": 0.1835, "rewards/accuracies": 0.875, "rewards/chosen": 0.22790539264678955, "rewards/margins": 2.462186813354492, "rewards/rejected": -2.234281539916992, "step": 4769 }, { "epoch": 0.55, "learning_rate": 1.3709469741308673e-07, "logits/chosen": -2.5629732608795166, "logits/rejected": -2.927661657333374, "logps/chosen": -299.96820068359375, "logps/rejected": -283.882080078125, "loss": 0.4321, "rewards/accuracies": 0.75, "rewards/chosen": -0.020544201135635376, "rewards/margins": 2.650297164916992, "rewards/rejected": -2.670841693878174, "step": 4770 }, { "epoch": 0.55, "learning_rate": 1.3705958094346248e-07, "logits/chosen": -3.1924710273742676, "logits/rejected": -3.0200133323669434, "logps/chosen": -381.2283935546875, "logps/rejected": -180.1150360107422, "loss": 0.5874, "rewards/accuracies": 0.75, "rewards/chosen": -0.5874235033988953, "rewards/margins": 0.46681854128837585, "rewards/rejected": -1.0542420148849487, "step": 4771 }, { "epoch": 0.55, "learning_rate": 1.3702446447383824e-07, "logits/chosen": -3.7137227058410645, "logits/rejected": -3.7481746673583984, "logps/chosen": -370.99176025390625, "logps/rejected": -345.04376220703125, "loss": 0.4851, "rewards/accuracies": 0.625, "rewards/chosen": -0.16713115572929382, "rewards/margins": 1.2075865268707275, "rewards/rejected": -1.3747177124023438, "step": 4772 }, { "epoch": 0.55, "learning_rate": 1.3698934800421396e-07, "logits/chosen": -3.4891738891601562, "logits/rejected": -3.478107452392578, "logps/chosen": -222.07122802734375, "logps/rejected": -177.7850341796875, "loss": 0.6224, "rewards/accuracies": 0.75, "rewards/chosen": -0.508493185043335, "rewards/margins": 0.9051705598831177, "rewards/rejected": -1.4136638641357422, "step": 4773 }, { "epoch": 0.55, "learning_rate": 1.3695423153458972e-07, "logits/chosen": -3.403113603591919, "logits/rejected": -3.3659071922302246, "logps/chosen": -159.90444946289062, "logps/rejected": -163.7713165283203, "loss": 0.5965, "rewards/accuracies": 0.75, "rewards/chosen": -0.07910767197608948, "rewards/margins": 1.87180757522583, "rewards/rejected": -1.9509150981903076, "step": 4774 }, { "epoch": 0.55, "learning_rate": 1.3691911506496547e-07, "logits/chosen": -3.2375857830047607, "logits/rejected": -3.3286445140838623, "logps/chosen": -203.01637268066406, "logps/rejected": -386.72662353515625, "loss": 0.4089, "rewards/accuracies": 0.75, "rewards/chosen": -0.06815581023693085, "rewards/margins": 2.1461100578308105, "rewards/rejected": -2.214265823364258, "step": 4775 }, { "epoch": 0.55, "learning_rate": 1.368839985953412e-07, "logits/chosen": -3.6438465118408203, "logits/rejected": -3.6152353286743164, "logps/chosen": -254.95199584960938, "logps/rejected": -254.06752014160156, "loss": 0.4163, "rewards/accuracies": 0.875, "rewards/chosen": 0.4058321416378021, "rewards/margins": 1.363391399383545, "rewards/rejected": -0.9575592279434204, "step": 4776 }, { "epoch": 0.55, "learning_rate": 1.3684888212571695e-07, "logits/chosen": -3.0051753520965576, "logits/rejected": -3.066567897796631, "logps/chosen": -356.7967224121094, "logps/rejected": -275.3105773925781, "loss": 0.3912, "rewards/accuracies": 0.75, "rewards/chosen": -0.23509468138217926, "rewards/margins": 1.6956194639205933, "rewards/rejected": -1.9307141304016113, "step": 4777 }, { "epoch": 0.55, "learning_rate": 1.368137656560927e-07, "logits/chosen": -2.690336227416992, "logits/rejected": -2.590217113494873, "logps/chosen": -583.7532348632812, "logps/rejected": -191.1849365234375, "loss": 0.6864, "rewards/accuracies": 0.625, "rewards/chosen": 0.2811686396598816, "rewards/margins": 1.1447964906692505, "rewards/rejected": -0.8636279106140137, "step": 4778 }, { "epoch": 0.55, "learning_rate": 1.3677864918646846e-07, "logits/chosen": -3.4166295528411865, "logits/rejected": -3.526822090148926, "logps/chosen": -225.54876708984375, "logps/rejected": -283.5772705078125, "loss": 0.3007, "rewards/accuracies": 1.0, "rewards/chosen": 0.2571033537387848, "rewards/margins": 1.8120750188827515, "rewards/rejected": -1.5549718141555786, "step": 4779 }, { "epoch": 0.55, "learning_rate": 1.367435327168442e-07, "logits/chosen": -2.805954694747925, "logits/rejected": -2.9064149856567383, "logps/chosen": -293.5096130371094, "logps/rejected": -330.1612854003906, "loss": 0.4177, "rewards/accuracies": 0.625, "rewards/chosen": 0.08531782776117325, "rewards/margins": 1.5060094594955444, "rewards/rejected": -1.420691728591919, "step": 4780 }, { "epoch": 0.55, "learning_rate": 1.3670841624721994e-07, "logits/chosen": -3.5663695335388184, "logits/rejected": -3.2378907203674316, "logps/chosen": -232.62371826171875, "logps/rejected": -323.1640625, "loss": 0.2934, "rewards/accuracies": 0.875, "rewards/chosen": -0.06929884850978851, "rewards/margins": 3.327594757080078, "rewards/rejected": -3.3968937397003174, "step": 4781 }, { "epoch": 0.55, "learning_rate": 1.3667329977759567e-07, "logits/chosen": -2.9413044452667236, "logits/rejected": -2.8289926052093506, "logps/chosen": -270.1614685058594, "logps/rejected": -228.17552185058594, "loss": 0.6262, "rewards/accuracies": 0.75, "rewards/chosen": -0.4310479462146759, "rewards/margins": 0.7258161306381226, "rewards/rejected": -1.1568641662597656, "step": 4782 }, { "epoch": 0.55, "learning_rate": 1.3663818330797145e-07, "logits/chosen": -2.992344856262207, "logits/rejected": -3.3419852256774902, "logps/chosen": -137.5279541015625, "logps/rejected": -292.6808166503906, "loss": 0.5452, "rewards/accuracies": 0.625, "rewards/chosen": -0.581852912902832, "rewards/margins": 1.3821008205413818, "rewards/rejected": -1.9639538526535034, "step": 4783 }, { "epoch": 0.55, "learning_rate": 1.3660306683834718e-07, "logits/chosen": -4.064541816711426, "logits/rejected": -3.7459912300109863, "logps/chosen": -305.4259033203125, "logps/rejected": -347.2422790527344, "loss": 0.5234, "rewards/accuracies": 0.625, "rewards/chosen": -0.24908877909183502, "rewards/margins": 1.738128423690796, "rewards/rejected": -1.9872173070907593, "step": 4784 }, { "epoch": 0.55, "learning_rate": 1.3656795036872293e-07, "logits/chosen": -3.4650232791900635, "logits/rejected": -3.0478177070617676, "logps/chosen": -251.04412841796875, "logps/rejected": -149.64674377441406, "loss": 0.1965, "rewards/accuracies": 1.0, "rewards/chosen": 0.14485712349414825, "rewards/margins": 1.8390424251556396, "rewards/rejected": -1.6941853761672974, "step": 4785 }, { "epoch": 0.55, "learning_rate": 1.3653283389909866e-07, "logits/chosen": -3.394439697265625, "logits/rejected": -3.5003890991210938, "logps/chosen": -104.1009521484375, "logps/rejected": -211.92538452148438, "loss": 0.2541, "rewards/accuracies": 1.0, "rewards/chosen": 0.38176268339157104, "rewards/margins": 2.1907334327697754, "rewards/rejected": -1.8089708089828491, "step": 4786 }, { "epoch": 0.55, "learning_rate": 1.364977174294744e-07, "logits/chosen": -3.2622852325439453, "logits/rejected": -3.328895092010498, "logps/chosen": -299.6871032714844, "logps/rejected": -289.53302001953125, "loss": 0.1756, "rewards/accuracies": 1.0, "rewards/chosen": 0.3952397108078003, "rewards/margins": 2.314263343811035, "rewards/rejected": -1.9190236330032349, "step": 4787 }, { "epoch": 0.55, "learning_rate": 1.3646260095985017e-07, "logits/chosen": -3.513597011566162, "logits/rejected": -3.5034642219543457, "logps/chosen": -161.7049560546875, "logps/rejected": -216.4225311279297, "loss": 0.4203, "rewards/accuracies": 0.875, "rewards/chosen": -0.2205355167388916, "rewards/margins": 2.2276575565338135, "rewards/rejected": -2.448193073272705, "step": 4788 }, { "epoch": 0.55, "learning_rate": 1.3642748449022592e-07, "logits/chosen": -2.6898934841156006, "logits/rejected": -2.591270923614502, "logps/chosen": -329.4867248535156, "logps/rejected": -391.5140686035156, "loss": 0.4408, "rewards/accuracies": 0.75, "rewards/chosen": 0.5852369070053101, "rewards/margins": 2.055316925048828, "rewards/rejected": -1.470080018043518, "step": 4789 }, { "epoch": 0.55, "learning_rate": 1.3639236802060165e-07, "logits/chosen": -2.629234552383423, "logits/rejected": -2.688204050064087, "logps/chosen": -154.25674438476562, "logps/rejected": -232.88870239257812, "loss": 0.1959, "rewards/accuracies": 0.875, "rewards/chosen": 0.2423698902130127, "rewards/margins": 3.418701648712158, "rewards/rejected": -3.1763315200805664, "step": 4790 }, { "epoch": 0.55, "learning_rate": 1.363572515509774e-07, "logits/chosen": -2.697660207748413, "logits/rejected": -2.5636579990386963, "logps/chosen": -274.52862548828125, "logps/rejected": -384.6920166015625, "loss": 0.3056, "rewards/accuracies": 0.875, "rewards/chosen": 0.23290996253490448, "rewards/margins": 2.1088058948516846, "rewards/rejected": -1.8758959770202637, "step": 4791 }, { "epoch": 0.55, "learning_rate": 1.3632213508135316e-07, "logits/chosen": -2.831786632537842, "logits/rejected": -2.716884136199951, "logps/chosen": -414.04925537109375, "logps/rejected": -347.8641052246094, "loss": 0.3906, "rewards/accuracies": 0.75, "rewards/chosen": -0.39507362246513367, "rewards/margins": 2.0783607959747314, "rewards/rejected": -2.4734344482421875, "step": 4792 }, { "epoch": 0.55, "learning_rate": 1.3628701861172888e-07, "logits/chosen": -3.36822509765625, "logits/rejected": -3.4875707626342773, "logps/chosen": -313.02471923828125, "logps/rejected": -353.93646240234375, "loss": 0.5727, "rewards/accuracies": 0.625, "rewards/chosen": -0.390035480260849, "rewards/margins": 0.4695722162723541, "rewards/rejected": -0.8596076369285583, "step": 4793 }, { "epoch": 0.55, "learning_rate": 1.3625190214210464e-07, "logits/chosen": -2.6339492797851562, "logits/rejected": -2.8939030170440674, "logps/chosen": -257.9361572265625, "logps/rejected": -298.8621826171875, "loss": 0.3541, "rewards/accuracies": 0.875, "rewards/chosen": -0.20023319125175476, "rewards/margins": 2.6396138668060303, "rewards/rejected": -2.8398468494415283, "step": 4794 }, { "epoch": 0.55, "learning_rate": 1.362167856724804e-07, "logits/chosen": -2.952746629714966, "logits/rejected": -2.8004980087280273, "logps/chosen": -309.64385986328125, "logps/rejected": -275.4190368652344, "loss": 0.8406, "rewards/accuracies": 0.5, "rewards/chosen": -0.6241786479949951, "rewards/margins": 0.6050137281417847, "rewards/rejected": -1.2291922569274902, "step": 4795 }, { "epoch": 0.55, "learning_rate": 1.3618166920285614e-07, "logits/chosen": -3.093174934387207, "logits/rejected": -3.395916223526001, "logps/chosen": -359.6121826171875, "logps/rejected": -322.474853515625, "loss": 0.3001, "rewards/accuracies": 0.875, "rewards/chosen": 0.11656445264816284, "rewards/margins": 1.5859254598617554, "rewards/rejected": -1.4693608283996582, "step": 4796 }, { "epoch": 0.55, "learning_rate": 1.3614655273323187e-07, "logits/chosen": -2.72330904006958, "logits/rejected": -2.7322347164154053, "logps/chosen": -386.3936767578125, "logps/rejected": -264.81646728515625, "loss": 0.3924, "rewards/accuracies": 0.75, "rewards/chosen": -0.35599571466445923, "rewards/margins": 1.1342438459396362, "rewards/rejected": -1.4902396202087402, "step": 4797 }, { "epoch": 0.55, "learning_rate": 1.3611143626360763e-07, "logits/chosen": -3.757974147796631, "logits/rejected": -3.329442262649536, "logps/chosen": -289.66925048828125, "logps/rejected": -261.1196594238281, "loss": 0.7946, "rewards/accuracies": 0.625, "rewards/chosen": 0.0787913054227829, "rewards/margins": 0.4578053057193756, "rewards/rejected": -0.3790140450000763, "step": 4798 }, { "epoch": 0.55, "learning_rate": 1.3607631979398338e-07, "logits/chosen": -3.269937038421631, "logits/rejected": -3.1520469188690186, "logps/chosen": -308.7500915527344, "logps/rejected": -334.4271240234375, "loss": 0.144, "rewards/accuracies": 1.0, "rewards/chosen": -0.10839928686618805, "rewards/margins": 2.4948372840881348, "rewards/rejected": -2.603236436843872, "step": 4799 }, { "epoch": 0.55, "learning_rate": 1.3604120332435913e-07, "logits/chosen": -2.708425760269165, "logits/rejected": -2.9537200927734375, "logps/chosen": -199.80999755859375, "logps/rejected": -256.37347412109375, "loss": 0.2505, "rewards/accuracies": 0.875, "rewards/chosen": 0.7223774194717407, "rewards/margins": 2.809574842453003, "rewards/rejected": -2.0871973037719727, "step": 4800 }, { "epoch": 0.55, "learning_rate": 1.3600608685473486e-07, "logits/chosen": -3.7470955848693848, "logits/rejected": -3.7523744106292725, "logps/chosen": -406.14764404296875, "logps/rejected": -446.1534729003906, "loss": 0.4081, "rewards/accuracies": 0.75, "rewards/chosen": 0.24842742085456848, "rewards/margins": 1.9037872552871704, "rewards/rejected": -1.6553596258163452, "step": 4801 }, { "epoch": 0.55, "learning_rate": 1.3597097038511061e-07, "logits/chosen": -3.582993984222412, "logits/rejected": -3.6742324829101562, "logps/chosen": -199.32125854492188, "logps/rejected": -169.59710693359375, "loss": 0.2085, "rewards/accuracies": 0.875, "rewards/chosen": 0.6986535787582397, "rewards/margins": 2.683745861053467, "rewards/rejected": -1.9850924015045166, "step": 4802 }, { "epoch": 0.55, "learning_rate": 1.3593585391548637e-07, "logits/chosen": -3.098726272583008, "logits/rejected": -3.030748128890991, "logps/chosen": -305.23419189453125, "logps/rejected": -259.2082824707031, "loss": 0.7404, "rewards/accuracies": 0.625, "rewards/chosen": -0.40921056270599365, "rewards/margins": 0.6607764959335327, "rewards/rejected": -1.0699870586395264, "step": 4803 }, { "epoch": 0.55, "learning_rate": 1.359007374458621e-07, "logits/chosen": -2.99583101272583, "logits/rejected": -3.0239319801330566, "logps/chosen": -183.1527099609375, "logps/rejected": -166.0958251953125, "loss": 0.3348, "rewards/accuracies": 0.875, "rewards/chosen": 0.400499165058136, "rewards/margins": 1.7980079650878906, "rewards/rejected": -1.3975088596343994, "step": 4804 }, { "epoch": 0.55, "learning_rate": 1.3586562097623785e-07, "logits/chosen": -3.8504719734191895, "logits/rejected": -3.223623752593994, "logps/chosen": -269.53009033203125, "logps/rejected": -199.30810546875, "loss": 0.2154, "rewards/accuracies": 0.875, "rewards/chosen": -0.014839313924312592, "rewards/margins": 2.951749801635742, "rewards/rejected": -2.9665892124176025, "step": 4805 }, { "epoch": 0.55, "learning_rate": 1.358305045066136e-07, "logits/chosen": -3.0472030639648438, "logits/rejected": -3.147517204284668, "logps/chosen": -164.67572021484375, "logps/rejected": -276.1445617675781, "loss": 0.5887, "rewards/accuracies": 0.75, "rewards/chosen": -0.4820007085800171, "rewards/margins": 0.7346557378768921, "rewards/rejected": -1.2166563272476196, "step": 4806 }, { "epoch": 0.55, "learning_rate": 1.3579538803698936e-07, "logits/chosen": -4.021679878234863, "logits/rejected": -4.035638809204102, "logps/chosen": -139.6142578125, "logps/rejected": -132.82315063476562, "loss": 0.3292, "rewards/accuracies": 0.875, "rewards/chosen": 0.7592330574989319, "rewards/margins": 1.7872421741485596, "rewards/rejected": -1.028009057044983, "step": 4807 }, { "epoch": 0.55, "learning_rate": 1.3576027156736508e-07, "logits/chosen": -2.7108912467956543, "logits/rejected": -2.6139345169067383, "logps/chosen": -289.5403747558594, "logps/rejected": -303.59991455078125, "loss": 0.42, "rewards/accuracies": 0.625, "rewards/chosen": 0.01578378677368164, "rewards/margins": 2.0974960327148438, "rewards/rejected": -2.081712007522583, "step": 4808 }, { "epoch": 0.55, "learning_rate": 1.3572515509774084e-07, "logits/chosen": -2.6065118312835693, "logits/rejected": -2.6107654571533203, "logps/chosen": -437.61395263671875, "logps/rejected": -373.51678466796875, "loss": 0.3496, "rewards/accuracies": 0.875, "rewards/chosen": -0.07688727974891663, "rewards/margins": 2.0648584365844727, "rewards/rejected": -2.1417455673217773, "step": 4809 }, { "epoch": 0.55, "learning_rate": 1.3569003862811657e-07, "logits/chosen": -2.8912014961242676, "logits/rejected": -2.8658504486083984, "logps/chosen": -234.30279541015625, "logps/rejected": -218.7839813232422, "loss": 0.1316, "rewards/accuracies": 1.0, "rewards/chosen": 0.7684351801872253, "rewards/margins": 2.9947919845581055, "rewards/rejected": -2.2263569831848145, "step": 4810 }, { "epoch": 0.55, "learning_rate": 1.3565492215849232e-07, "logits/chosen": -3.134061813354492, "logits/rejected": -2.8946752548217773, "logps/chosen": -199.5254364013672, "logps/rejected": -204.36962890625, "loss": 0.312, "rewards/accuracies": 1.0, "rewards/chosen": -0.5705145001411438, "rewards/margins": 1.340956449508667, "rewards/rejected": -1.911470890045166, "step": 4811 }, { "epoch": 0.55, "learning_rate": 1.3561980568886807e-07, "logits/chosen": -2.400561571121216, "logits/rejected": -2.366670608520508, "logps/chosen": -213.44029235839844, "logps/rejected": -262.0770263671875, "loss": 0.2283, "rewards/accuracies": 0.875, "rewards/chosen": -0.014767736196517944, "rewards/margins": 2.195666790008545, "rewards/rejected": -2.2104344367980957, "step": 4812 }, { "epoch": 0.55, "learning_rate": 1.3558468921924383e-07, "logits/chosen": -3.2016470432281494, "logits/rejected": -3.0882012844085693, "logps/chosen": -203.08450317382812, "logps/rejected": -283.7563781738281, "loss": 0.3346, "rewards/accuracies": 0.875, "rewards/chosen": 0.14292840659618378, "rewards/margins": 1.3093523979187012, "rewards/rejected": -1.166424036026001, "step": 4813 }, { "epoch": 0.55, "learning_rate": 1.3554957274961955e-07, "logits/chosen": -2.70068621635437, "logits/rejected": -2.8671205043792725, "logps/chosen": -267.1661071777344, "logps/rejected": -339.5758056640625, "loss": 0.1799, "rewards/accuracies": 1.0, "rewards/chosen": 0.04394865781068802, "rewards/margins": 2.448624610900879, "rewards/rejected": -2.4046759605407715, "step": 4814 }, { "epoch": 0.56, "learning_rate": 1.355144562799953e-07, "logits/chosen": -2.507544994354248, "logits/rejected": -2.3712058067321777, "logps/chosen": -115.41021728515625, "logps/rejected": -148.20950317382812, "loss": 0.492, "rewards/accuracies": 0.625, "rewards/chosen": -0.1376158595085144, "rewards/margins": 1.3625843524932861, "rewards/rejected": -1.5002002716064453, "step": 4815 }, { "epoch": 0.56, "learning_rate": 1.3547933981037106e-07, "logits/chosen": -2.4999780654907227, "logits/rejected": -2.5281057357788086, "logps/chosen": -366.2011413574219, "logps/rejected": -405.9465026855469, "loss": 0.5795, "rewards/accuracies": 0.625, "rewards/chosen": -0.1387409120798111, "rewards/margins": 0.6671519875526428, "rewards/rejected": -0.8058929443359375, "step": 4816 }, { "epoch": 0.56, "learning_rate": 1.3544422334074682e-07, "logits/chosen": -3.052475690841675, "logits/rejected": -3.20241641998291, "logps/chosen": -194.8136749267578, "logps/rejected": -205.76348876953125, "loss": 0.4408, "rewards/accuracies": 0.75, "rewards/chosen": 0.0834440365433693, "rewards/margins": 1.7555515766143799, "rewards/rejected": -1.6721075773239136, "step": 4817 }, { "epoch": 0.56, "learning_rate": 1.3540910687112254e-07, "logits/chosen": -2.832705020904541, "logits/rejected": -2.8966004848480225, "logps/chosen": -227.5538330078125, "logps/rejected": -232.8944091796875, "loss": 0.2509, "rewards/accuracies": 0.875, "rewards/chosen": 0.12707018852233887, "rewards/margins": 1.9887573719024658, "rewards/rejected": -1.861687183380127, "step": 4818 }, { "epoch": 0.56, "learning_rate": 1.353739904014983e-07, "logits/chosen": -3.3973307609558105, "logits/rejected": -3.32358455657959, "logps/chosen": -374.03277587890625, "logps/rejected": -297.51641845703125, "loss": 0.3603, "rewards/accuracies": 0.75, "rewards/chosen": -0.2773796319961548, "rewards/margins": 1.5671623945236206, "rewards/rejected": -1.8445419073104858, "step": 4819 }, { "epoch": 0.56, "learning_rate": 1.3533887393187405e-07, "logits/chosen": -2.989832878112793, "logits/rejected": -3.230113983154297, "logps/chosen": -215.4257049560547, "logps/rejected": -155.75863647460938, "loss": 0.4069, "rewards/accuracies": 0.75, "rewards/chosen": -0.2831127643585205, "rewards/margins": 1.4341282844543457, "rewards/rejected": -1.7172410488128662, "step": 4820 }, { "epoch": 0.56, "learning_rate": 1.3530375746224978e-07, "logits/chosen": -3.1014227867126465, "logits/rejected": -3.2705137729644775, "logps/chosen": -170.6185302734375, "logps/rejected": -166.13600158691406, "loss": 0.3311, "rewards/accuracies": 0.875, "rewards/chosen": -0.20078733563423157, "rewards/margins": 1.4193609952926636, "rewards/rejected": -1.6201481819152832, "step": 4821 }, { "epoch": 0.56, "learning_rate": 1.3526864099262553e-07, "logits/chosen": -3.4650824069976807, "logits/rejected": -3.531498432159424, "logps/chosen": -370.0041809082031, "logps/rejected": -258.9075012207031, "loss": 0.2501, "rewards/accuracies": 0.875, "rewards/chosen": -0.1490071564912796, "rewards/margins": 1.9374613761901855, "rewards/rejected": -2.0864686965942383, "step": 4822 }, { "epoch": 0.56, "learning_rate": 1.3523352452300129e-07, "logits/chosen": -2.775221109390259, "logits/rejected": -2.6706061363220215, "logps/chosen": -313.1297302246094, "logps/rejected": -240.7827911376953, "loss": 0.5632, "rewards/accuracies": 0.75, "rewards/chosen": -0.27965545654296875, "rewards/margins": 1.176781415939331, "rewards/rejected": -1.4564369916915894, "step": 4823 }, { "epoch": 0.56, "learning_rate": 1.3519840805337704e-07, "logits/chosen": -3.0441746711730957, "logits/rejected": -2.8616628646850586, "logps/chosen": -184.55111694335938, "logps/rejected": -257.4252014160156, "loss": 0.499, "rewards/accuracies": 0.625, "rewards/chosen": -0.3580019176006317, "rewards/margins": 1.341821312904358, "rewards/rejected": -1.699823260307312, "step": 4824 }, { "epoch": 0.56, "learning_rate": 1.3516329158375277e-07, "logits/chosen": -3.325838088989258, "logits/rejected": -3.4475948810577393, "logps/chosen": -226.41952514648438, "logps/rejected": -215.51629638671875, "loss": 0.2754, "rewards/accuracies": 0.875, "rewards/chosen": 0.320473849773407, "rewards/margins": 2.2675461769104004, "rewards/rejected": -1.9470725059509277, "step": 4825 }, { "epoch": 0.56, "learning_rate": 1.3512817511412852e-07, "logits/chosen": -3.012838363647461, "logits/rejected": -3.366415500640869, "logps/chosen": -299.0970153808594, "logps/rejected": -248.8348388671875, "loss": 0.3798, "rewards/accuracies": 0.875, "rewards/chosen": 0.19273032248020172, "rewards/margins": 2.461395025253296, "rewards/rejected": -2.268664836883545, "step": 4826 }, { "epoch": 0.56, "learning_rate": 1.3509305864450425e-07, "logits/chosen": -3.459104061126709, "logits/rejected": -3.38262939453125, "logps/chosen": -191.81817626953125, "logps/rejected": -249.34674072265625, "loss": 0.2117, "rewards/accuracies": 1.0, "rewards/chosen": 0.3030503988265991, "rewards/margins": 2.5507140159606934, "rewards/rejected": -2.2476634979248047, "step": 4827 }, { "epoch": 0.56, "learning_rate": 1.3505794217488003e-07, "logits/chosen": -3.1061248779296875, "logits/rejected": -3.4108190536499023, "logps/chosen": -232.8472442626953, "logps/rejected": -286.937255859375, "loss": 0.3343, "rewards/accuracies": 0.75, "rewards/chosen": 0.11478567868471146, "rewards/margins": 1.961240530014038, "rewards/rejected": -1.8464550971984863, "step": 4828 }, { "epoch": 0.56, "learning_rate": 1.3502282570525576e-07, "logits/chosen": -3.045334815979004, "logits/rejected": -3.1079835891723633, "logps/chosen": -243.71426391601562, "logps/rejected": -149.33319091796875, "loss": 0.7043, "rewards/accuracies": 0.75, "rewards/chosen": -0.41339150071144104, "rewards/margins": 0.4816927909851074, "rewards/rejected": -0.8950843811035156, "step": 4829 }, { "epoch": 0.56, "learning_rate": 1.349877092356315e-07, "logits/chosen": -3.48516583442688, "logits/rejected": -3.730537176132202, "logps/chosen": -159.69068908691406, "logps/rejected": -242.7703094482422, "loss": 0.5968, "rewards/accuracies": 0.75, "rewards/chosen": -0.262426495552063, "rewards/margins": 2.048241138458252, "rewards/rejected": -2.3106675148010254, "step": 4830 }, { "epoch": 0.56, "learning_rate": 1.3495259276600724e-07, "logits/chosen": -3.784970283508301, "logits/rejected": -4.0897626876831055, "logps/chosen": -158.39793395996094, "logps/rejected": -282.8544616699219, "loss": 0.2463, "rewards/accuracies": 1.0, "rewards/chosen": -0.47611457109451294, "rewards/margins": 1.9744093418121338, "rewards/rejected": -2.450523853302002, "step": 4831 }, { "epoch": 0.56, "learning_rate": 1.34917476296383e-07, "logits/chosen": -2.930586576461792, "logits/rejected": -2.8573992252349854, "logps/chosen": -450.7336730957031, "logps/rejected": -419.904296875, "loss": 0.1355, "rewards/accuracies": 1.0, "rewards/chosen": 0.23014883697032928, "rewards/margins": 2.7520954608917236, "rewards/rejected": -2.521946430206299, "step": 4832 }, { "epoch": 0.56, "learning_rate": 1.3488235982675875e-07, "logits/chosen": -2.9913249015808105, "logits/rejected": -2.9230878353118896, "logps/chosen": -291.359619140625, "logps/rejected": -184.53817749023438, "loss": 0.5056, "rewards/accuracies": 0.625, "rewards/chosen": -0.08024879544973373, "rewards/margins": 0.7657583355903625, "rewards/rejected": -0.8460071682929993, "step": 4833 }, { "epoch": 0.56, "learning_rate": 1.348472433571345e-07, "logits/chosen": -2.9319801330566406, "logits/rejected": -3.274892568588257, "logps/chosen": -266.0690612792969, "logps/rejected": -194.55953979492188, "loss": 0.5484, "rewards/accuracies": 0.75, "rewards/chosen": -0.3379361927509308, "rewards/margins": 1.5490832328796387, "rewards/rejected": -1.887019395828247, "step": 4834 }, { "epoch": 0.56, "learning_rate": 1.3481212688751023e-07, "logits/chosen": -3.1912598609924316, "logits/rejected": -3.297440767288208, "logps/chosen": -419.15704345703125, "logps/rejected": -322.160400390625, "loss": 0.267, "rewards/accuracies": 0.875, "rewards/chosen": 0.05621958523988724, "rewards/margins": 2.4918596744537354, "rewards/rejected": -2.4356398582458496, "step": 4835 }, { "epoch": 0.56, "learning_rate": 1.3477701041788598e-07, "logits/chosen": -3.407176971435547, "logits/rejected": -3.4314703941345215, "logps/chosen": -269.7158508300781, "logps/rejected": -441.9659423828125, "loss": 0.2547, "rewards/accuracies": 0.875, "rewards/chosen": -0.028047889471054077, "rewards/margins": 1.861912727355957, "rewards/rejected": -1.889960527420044, "step": 4836 }, { "epoch": 0.56, "learning_rate": 1.3474189394826173e-07, "logits/chosen": -3.571760654449463, "logits/rejected": -3.4172940254211426, "logps/chosen": -293.99151611328125, "logps/rejected": -196.97222900390625, "loss": 0.314, "rewards/accuracies": 0.875, "rewards/chosen": 0.16517163813114166, "rewards/margins": 2.8512794971466064, "rewards/rejected": -2.686107873916626, "step": 4837 }, { "epoch": 0.56, "learning_rate": 1.3470677747863746e-07, "logits/chosen": -2.9262590408325195, "logits/rejected": -2.8587985038757324, "logps/chosen": -294.5781555175781, "logps/rejected": -357.4911193847656, "loss": 0.5016, "rewards/accuracies": 0.75, "rewards/chosen": 0.042933061718940735, "rewards/margins": 0.6989614963531494, "rewards/rejected": -0.6560283899307251, "step": 4838 }, { "epoch": 0.56, "learning_rate": 1.3467166100901322e-07, "logits/chosen": -2.828927993774414, "logits/rejected": -2.7206318378448486, "logps/chosen": -278.33807373046875, "logps/rejected": -235.42579650878906, "loss": 0.2654, "rewards/accuracies": 0.875, "rewards/chosen": 0.08209299296140671, "rewards/margins": 1.8494737148284912, "rewards/rejected": -1.767380714416504, "step": 4839 }, { "epoch": 0.56, "learning_rate": 1.3463654453938897e-07, "logits/chosen": -3.3520681858062744, "logits/rejected": -3.4561681747436523, "logps/chosen": -330.1048278808594, "logps/rejected": -300.9739990234375, "loss": 0.3267, "rewards/accuracies": 0.75, "rewards/chosen": -0.6589967608451843, "rewards/margins": 2.8070249557495117, "rewards/rejected": -3.4660215377807617, "step": 4840 }, { "epoch": 0.56, "learning_rate": 1.3460142806976472e-07, "logits/chosen": -2.923001527786255, "logits/rejected": -2.761061191558838, "logps/chosen": -194.61488342285156, "logps/rejected": -282.61181640625, "loss": 0.4844, "rewards/accuracies": 0.75, "rewards/chosen": -0.5914462804794312, "rewards/margins": 1.8088228702545166, "rewards/rejected": -2.400269031524658, "step": 4841 }, { "epoch": 0.56, "learning_rate": 1.3456631160014045e-07, "logits/chosen": -3.000657081604004, "logits/rejected": -2.9436168670654297, "logps/chosen": -364.169921875, "logps/rejected": -370.56103515625, "loss": 0.6439, "rewards/accuracies": 0.625, "rewards/chosen": -0.4509199559688568, "rewards/margins": 0.9152641296386719, "rewards/rejected": -1.3661839962005615, "step": 4842 }, { "epoch": 0.56, "learning_rate": 1.345311951305162e-07, "logits/chosen": -2.811077117919922, "logits/rejected": -2.747037410736084, "logps/chosen": -130.36419677734375, "logps/rejected": -176.325927734375, "loss": 0.43, "rewards/accuracies": 0.75, "rewards/chosen": 0.13964799046516418, "rewards/margins": 1.609838604927063, "rewards/rejected": -1.4701906442642212, "step": 4843 }, { "epoch": 0.56, "learning_rate": 1.3449607866089196e-07, "logits/chosen": -2.7604429721832275, "logits/rejected": -3.0650200843811035, "logps/chosen": -248.79454040527344, "logps/rejected": -297.4755554199219, "loss": 0.3708, "rewards/accuracies": 0.875, "rewards/chosen": -0.04954535514116287, "rewards/margins": 2.1999387741088867, "rewards/rejected": -2.2494845390319824, "step": 4844 }, { "epoch": 0.56, "learning_rate": 1.344609621912677e-07, "logits/chosen": -3.021958112716675, "logits/rejected": -3.329211950302124, "logps/chosen": -288.27117919921875, "logps/rejected": -396.48529052734375, "loss": 0.2632, "rewards/accuracies": 0.75, "rewards/chosen": 0.369141161441803, "rewards/margins": 3.005490303039551, "rewards/rejected": -2.6363492012023926, "step": 4845 }, { "epoch": 0.56, "learning_rate": 1.3442584572164344e-07, "logits/chosen": -2.798164129257202, "logits/rejected": -2.4707658290863037, "logps/chosen": -365.00738525390625, "logps/rejected": -181.79739379882812, "loss": 0.3903, "rewards/accuracies": 0.875, "rewards/chosen": -0.5706957578659058, "rewards/margins": 0.9513221979141235, "rewards/rejected": -1.5220179557800293, "step": 4846 }, { "epoch": 0.56, "learning_rate": 1.343907292520192e-07, "logits/chosen": -2.6897623538970947, "logits/rejected": -2.7125229835510254, "logps/chosen": -185.63497924804688, "logps/rejected": -251.07875061035156, "loss": 0.5566, "rewards/accuracies": 0.625, "rewards/chosen": -0.7228617668151855, "rewards/margins": 0.850774884223938, "rewards/rejected": -1.5736368894577026, "step": 4847 }, { "epoch": 0.56, "learning_rate": 1.3435561278239495e-07, "logits/chosen": -2.2424614429473877, "logits/rejected": -2.366542339324951, "logps/chosen": -296.88250732421875, "logps/rejected": -222.3978271484375, "loss": 0.3989, "rewards/accuracies": 0.75, "rewards/chosen": -0.2708043158054352, "rewards/margins": 1.9222540855407715, "rewards/rejected": -2.1930582523345947, "step": 4848 }, { "epoch": 0.56, "learning_rate": 1.3432049631277067e-07, "logits/chosen": -3.0405662059783936, "logits/rejected": -3.073359727859497, "logps/chosen": -144.9718780517578, "logps/rejected": -209.8593292236328, "loss": 0.6226, "rewards/accuracies": 0.5, "rewards/chosen": -0.43822360038757324, "rewards/margins": 0.2565137445926666, "rewards/rejected": -0.6947373747825623, "step": 4849 }, { "epoch": 0.56, "learning_rate": 1.3428537984314643e-07, "logits/chosen": -2.567556142807007, "logits/rejected": -2.6248531341552734, "logps/chosen": -267.56622314453125, "logps/rejected": -355.36273193359375, "loss": 0.4833, "rewards/accuracies": 0.625, "rewards/chosen": -0.16132131218910217, "rewards/margins": 1.9581066370010376, "rewards/rejected": -2.1194279193878174, "step": 4850 }, { "epoch": 0.56, "learning_rate": 1.3425026337352218e-07, "logits/chosen": -3.149705171585083, "logits/rejected": -3.2515146732330322, "logps/chosen": -273.7870178222656, "logps/rejected": -307.6505126953125, "loss": 0.347, "rewards/accuracies": 0.875, "rewards/chosen": 0.2879689633846283, "rewards/margins": 2.1306846141815186, "rewards/rejected": -1.8427156209945679, "step": 4851 }, { "epoch": 0.56, "learning_rate": 1.3421514690389794e-07, "logits/chosen": -2.9041621685028076, "logits/rejected": -3.0985188484191895, "logps/chosen": -218.04209899902344, "logps/rejected": -201.47117614746094, "loss": 0.4004, "rewards/accuracies": 0.75, "rewards/chosen": -0.10240790992975235, "rewards/margins": 1.3379887342453003, "rewards/rejected": -1.4403966665267944, "step": 4852 }, { "epoch": 0.56, "learning_rate": 1.3418003043427366e-07, "logits/chosen": -2.756856679916382, "logits/rejected": -2.6693592071533203, "logps/chosen": -380.99359130859375, "logps/rejected": -552.88916015625, "loss": 0.5896, "rewards/accuracies": 0.75, "rewards/chosen": 0.13441553711891174, "rewards/margins": 1.7115068435668945, "rewards/rejected": -1.5770913362503052, "step": 4853 }, { "epoch": 0.56, "learning_rate": 1.3414491396464942e-07, "logits/chosen": -3.142477512359619, "logits/rejected": -3.204620838165283, "logps/chosen": -229.48768615722656, "logps/rejected": -340.72991943359375, "loss": 0.2358, "rewards/accuracies": 1.0, "rewards/chosen": 0.20311178267002106, "rewards/margins": 4.246486663818359, "rewards/rejected": -4.043375015258789, "step": 4854 }, { "epoch": 0.56, "learning_rate": 1.3410979749502515e-07, "logits/chosen": -3.6987924575805664, "logits/rejected": -3.6687729358673096, "logps/chosen": -381.3331604003906, "logps/rejected": -201.689453125, "loss": 0.5829, "rewards/accuracies": 0.75, "rewards/chosen": -0.32105523347854614, "rewards/margins": 1.1463972330093384, "rewards/rejected": -1.4674524068832397, "step": 4855 }, { "epoch": 0.56, "learning_rate": 1.340746810254009e-07, "logits/chosen": -2.589390277862549, "logits/rejected": -2.2709686756134033, "logps/chosen": -310.2882080078125, "logps/rejected": -344.4661865234375, "loss": 0.3742, "rewards/accuracies": 0.75, "rewards/chosen": -0.1064542829990387, "rewards/margins": 1.180747628211975, "rewards/rejected": -1.2872018814086914, "step": 4856 }, { "epoch": 0.56, "learning_rate": 1.3403956455577665e-07, "logits/chosen": -3.822092056274414, "logits/rejected": -4.0002031326293945, "logps/chosen": -239.52542114257812, "logps/rejected": -335.8203430175781, "loss": 0.2694, "rewards/accuracies": 0.875, "rewards/chosen": -0.29226887226104736, "rewards/margins": 2.665064811706543, "rewards/rejected": -2.957333564758301, "step": 4857 }, { "epoch": 0.56, "learning_rate": 1.340044480861524e-07, "logits/chosen": -2.62776780128479, "logits/rejected": -2.612459182739258, "logps/chosen": -322.82635498046875, "logps/rejected": -277.3119201660156, "loss": 0.635, "rewards/accuracies": 0.625, "rewards/chosen": 0.10354135185480118, "rewards/margins": 1.2655569314956665, "rewards/rejected": -1.162015676498413, "step": 4858 }, { "epoch": 0.56, "learning_rate": 1.3396933161652813e-07, "logits/chosen": -2.488922119140625, "logits/rejected": -2.429255723953247, "logps/chosen": -362.50244140625, "logps/rejected": -326.0660400390625, "loss": 0.2951, "rewards/accuracies": 0.875, "rewards/chosen": -0.014360696077346802, "rewards/margins": 2.083865165710449, "rewards/rejected": -2.0982260704040527, "step": 4859 }, { "epoch": 0.56, "learning_rate": 1.339342151469039e-07, "logits/chosen": -2.264695405960083, "logits/rejected": -2.3530220985412598, "logps/chosen": -238.4569091796875, "logps/rejected": -292.154052734375, "loss": 0.4651, "rewards/accuracies": 0.875, "rewards/chosen": -0.17009392380714417, "rewards/margins": 0.6949657201766968, "rewards/rejected": -0.8650596141815186, "step": 4860 }, { "epoch": 0.56, "learning_rate": 1.3389909867727964e-07, "logits/chosen": -2.9196932315826416, "logits/rejected": -3.095858097076416, "logps/chosen": -257.50958251953125, "logps/rejected": -201.7657012939453, "loss": 0.3847, "rewards/accuracies": 0.75, "rewards/chosen": 0.16587576270103455, "rewards/margins": 1.9857512712478638, "rewards/rejected": -1.8198754787445068, "step": 4861 }, { "epoch": 0.56, "learning_rate": 1.338639822076554e-07, "logits/chosen": -3.3623828887939453, "logits/rejected": -3.0689690113067627, "logps/chosen": -303.13214111328125, "logps/rejected": -155.29820251464844, "loss": 0.6533, "rewards/accuracies": 0.75, "rewards/chosen": -0.34094107151031494, "rewards/margins": 0.659579873085022, "rewards/rejected": -1.000520944595337, "step": 4862 }, { "epoch": 0.56, "learning_rate": 1.3382886573803112e-07, "logits/chosen": -3.2355270385742188, "logits/rejected": -3.4388670921325684, "logps/chosen": -162.42108154296875, "logps/rejected": -267.4080810546875, "loss": 0.3119, "rewards/accuracies": 0.75, "rewards/chosen": -0.36710667610168457, "rewards/margins": 1.5989726781845093, "rewards/rejected": -1.9660792350769043, "step": 4863 }, { "epoch": 0.56, "learning_rate": 1.3379374926840688e-07, "logits/chosen": -3.60128116607666, "logits/rejected": -3.4398837089538574, "logps/chosen": -246.8362274169922, "logps/rejected": -210.9217987060547, "loss": 0.201, "rewards/accuracies": 0.875, "rewards/chosen": 0.13883084058761597, "rewards/margins": 2.3321924209594727, "rewards/rejected": -2.193361520767212, "step": 4864 }, { "epoch": 0.56, "learning_rate": 1.3375863279878263e-07, "logits/chosen": -3.412571907043457, "logits/rejected": -3.04681396484375, "logps/chosen": -188.23965454101562, "logps/rejected": -131.96505737304688, "loss": 0.6603, "rewards/accuracies": 0.625, "rewards/chosen": -0.4602607488632202, "rewards/margins": 0.4447513818740845, "rewards/rejected": -0.9050121307373047, "step": 4865 }, { "epoch": 0.56, "learning_rate": 1.3372351632915836e-07, "logits/chosen": -2.7117178440093994, "logits/rejected": -2.9011940956115723, "logps/chosen": -350.18780517578125, "logps/rejected": -270.3172607421875, "loss": 0.3901, "rewards/accuracies": 0.875, "rewards/chosen": 0.11174158751964569, "rewards/margins": 1.5295052528381348, "rewards/rejected": -1.4177637100219727, "step": 4866 }, { "epoch": 0.56, "learning_rate": 1.336883998595341e-07, "logits/chosen": -3.244351863861084, "logits/rejected": -3.3928301334381104, "logps/chosen": -261.85455322265625, "logps/rejected": -209.16921997070312, "loss": 0.5852, "rewards/accuracies": 0.75, "rewards/chosen": -0.5272128582000732, "rewards/margins": 1.4830909967422485, "rewards/rejected": -2.0103037357330322, "step": 4867 }, { "epoch": 0.56, "learning_rate": 1.3365328338990987e-07, "logits/chosen": -3.489499568939209, "logits/rejected": -3.1806702613830566, "logps/chosen": -228.4987335205078, "logps/rejected": -296.7225341796875, "loss": 0.2504, "rewards/accuracies": 1.0, "rewards/chosen": 0.3213415741920471, "rewards/margins": 3.3911681175231934, "rewards/rejected": -3.06982684135437, "step": 4868 }, { "epoch": 0.56, "learning_rate": 1.3361816692028562e-07, "logits/chosen": -3.2473902702331543, "logits/rejected": -3.317274808883667, "logps/chosen": -179.48220825195312, "logps/rejected": -132.71267700195312, "loss": 0.4831, "rewards/accuracies": 0.625, "rewards/chosen": 0.5110719203948975, "rewards/margins": 1.3085829019546509, "rewards/rejected": -0.7975109815597534, "step": 4869 }, { "epoch": 0.56, "learning_rate": 1.3358305045066135e-07, "logits/chosen": -3.4229869842529297, "logits/rejected": -3.6082475185394287, "logps/chosen": -251.3162841796875, "logps/rejected": -287.1957092285156, "loss": 0.3347, "rewards/accuracies": 0.875, "rewards/chosen": -0.0703224390745163, "rewards/margins": 2.5182015895843506, "rewards/rejected": -2.5885238647460938, "step": 4870 }, { "epoch": 0.56, "learning_rate": 1.335479339810371e-07, "logits/chosen": -3.167391777038574, "logits/rejected": -3.154592275619507, "logps/chosen": -209.53909301757812, "logps/rejected": -238.13009643554688, "loss": 0.4809, "rewards/accuracies": 0.875, "rewards/chosen": -0.27252107858657837, "rewards/margins": 1.8771737813949585, "rewards/rejected": -2.1496949195861816, "step": 4871 }, { "epoch": 0.56, "learning_rate": 1.3351281751141283e-07, "logits/chosen": -2.583113431930542, "logits/rejected": -2.6187350749969482, "logps/chosen": -251.20697021484375, "logps/rejected": -229.94151306152344, "loss": 0.3229, "rewards/accuracies": 1.0, "rewards/chosen": -0.23218902945518494, "rewards/margins": 2.5315258502960205, "rewards/rejected": -2.7637150287628174, "step": 4872 }, { "epoch": 0.56, "learning_rate": 1.334777010417886e-07, "logits/chosen": -3.906602144241333, "logits/rejected": -3.305623769760132, "logps/chosen": -476.9632568359375, "logps/rejected": -165.501220703125, "loss": 0.2043, "rewards/accuracies": 1.0, "rewards/chosen": 0.5393829345703125, "rewards/margins": 2.1506009101867676, "rewards/rejected": -1.611217975616455, "step": 4873 }, { "epoch": 0.56, "learning_rate": 1.3344258457216434e-07, "logits/chosen": -3.215756416320801, "logits/rejected": -3.1138904094696045, "logps/chosen": -234.2472381591797, "logps/rejected": -269.13409423828125, "loss": 0.335, "rewards/accuracies": 0.875, "rewards/chosen": 0.41102439165115356, "rewards/margins": 1.8459210395812988, "rewards/rejected": -1.4348968267440796, "step": 4874 }, { "epoch": 0.56, "learning_rate": 1.334074681025401e-07, "logits/chosen": -2.70554518699646, "logits/rejected": -2.6370763778686523, "logps/chosen": -326.5443115234375, "logps/rejected": -226.34170532226562, "loss": 0.474, "rewards/accuracies": 0.75, "rewards/chosen": -0.06508886814117432, "rewards/margins": 0.9070602059364319, "rewards/rejected": -0.972149133682251, "step": 4875 }, { "epoch": 0.56, "learning_rate": 1.3337235163291582e-07, "logits/chosen": -3.4912710189819336, "logits/rejected": -3.4224023818969727, "logps/chosen": -126.04425048828125, "logps/rejected": -239.3314208984375, "loss": 0.3509, "rewards/accuracies": 0.875, "rewards/chosen": 0.09109699726104736, "rewards/margins": 1.4223251342773438, "rewards/rejected": -1.331228256225586, "step": 4876 }, { "epoch": 0.56, "learning_rate": 1.3333723516329157e-07, "logits/chosen": -3.041226387023926, "logits/rejected": -2.8421080112457275, "logps/chosen": -390.6431884765625, "logps/rejected": -197.08226013183594, "loss": 0.3534, "rewards/accuracies": 0.875, "rewards/chosen": -0.5289754867553711, "rewards/margins": 1.0216331481933594, "rewards/rejected": -1.55060875415802, "step": 4877 }, { "epoch": 0.56, "learning_rate": 1.3330211869366732e-07, "logits/chosen": -3.5621020793914795, "logits/rejected": -3.4112510681152344, "logps/chosen": -216.70046997070312, "logps/rejected": -261.461669921875, "loss": 0.3477, "rewards/accuracies": 1.0, "rewards/chosen": 0.3270961046218872, "rewards/margins": 1.328822135925293, "rewards/rejected": -1.0017261505126953, "step": 4878 }, { "epoch": 0.56, "learning_rate": 1.3326700222404308e-07, "logits/chosen": -3.0315003395080566, "logits/rejected": -2.9616591930389404, "logps/chosen": -333.7755126953125, "logps/rejected": -291.47711181640625, "loss": 0.296, "rewards/accuracies": 1.0, "rewards/chosen": -0.5803419351577759, "rewards/margins": 1.4283270835876465, "rewards/rejected": -2.008668899536133, "step": 4879 }, { "epoch": 0.56, "learning_rate": 1.332318857544188e-07, "logits/chosen": -2.868744373321533, "logits/rejected": -3.033236503601074, "logps/chosen": -259.2884521484375, "logps/rejected": -182.00111389160156, "loss": 0.2573, "rewards/accuracies": 1.0, "rewards/chosen": 0.13484808802604675, "rewards/margins": 1.9310410022735596, "rewards/rejected": -1.7961928844451904, "step": 4880 }, { "epoch": 0.56, "learning_rate": 1.3319676928479456e-07, "logits/chosen": -2.076016664505005, "logits/rejected": -1.9806132316589355, "logps/chosen": -349.8100280761719, "logps/rejected": -349.3825378417969, "loss": 0.4444, "rewards/accuracies": 0.75, "rewards/chosen": -0.12152034044265747, "rewards/margins": 1.422515630722046, "rewards/rejected": -1.5440359115600586, "step": 4881 }, { "epoch": 0.56, "learning_rate": 1.3316165281517031e-07, "logits/chosen": -3.6869988441467285, "logits/rejected": -3.1068663597106934, "logps/chosen": -321.1912841796875, "logps/rejected": -229.72854614257812, "loss": 0.2961, "rewards/accuracies": 0.875, "rewards/chosen": -0.6402081251144409, "rewards/margins": 2.112602949142456, "rewards/rejected": -2.7528111934661865, "step": 4882 }, { "epoch": 0.56, "learning_rate": 1.3312653634554604e-07, "logits/chosen": -3.2297158241271973, "logits/rejected": -3.28519344329834, "logps/chosen": -243.52206420898438, "logps/rejected": -150.48068237304688, "loss": 0.4912, "rewards/accuracies": 0.75, "rewards/chosen": -0.04699200391769409, "rewards/margins": 0.974571943283081, "rewards/rejected": -1.02156400680542, "step": 4883 }, { "epoch": 0.56, "learning_rate": 1.330914198759218e-07, "logits/chosen": -2.942178964614868, "logits/rejected": -3.0404787063598633, "logps/chosen": -293.04449462890625, "logps/rejected": -186.2045135498047, "loss": 0.3281, "rewards/accuracies": 0.875, "rewards/chosen": 0.38310950994491577, "rewards/margins": 1.6764382123947144, "rewards/rejected": -1.2933286428451538, "step": 4884 }, { "epoch": 0.56, "learning_rate": 1.3305630340629755e-07, "logits/chosen": -3.228635549545288, "logits/rejected": -2.8563525676727295, "logps/chosen": -278.8379821777344, "logps/rejected": -279.0132751464844, "loss": 0.4463, "rewards/accuracies": 0.75, "rewards/chosen": -0.25605177879333496, "rewards/margins": 1.6962398290634155, "rewards/rejected": -1.95229172706604, "step": 4885 }, { "epoch": 0.56, "learning_rate": 1.330211869366733e-07, "logits/chosen": -2.89609432220459, "logits/rejected": -2.9884021282196045, "logps/chosen": -222.19039916992188, "logps/rejected": -175.80857849121094, "loss": 0.3163, "rewards/accuracies": 0.875, "rewards/chosen": -0.32426342368125916, "rewards/margins": 1.8459675312042236, "rewards/rejected": -2.1702308654785156, "step": 4886 }, { "epoch": 0.56, "learning_rate": 1.3298607046704903e-07, "logits/chosen": -2.879338264465332, "logits/rejected": -2.895312786102295, "logps/chosen": -195.06227111816406, "logps/rejected": -237.02513122558594, "loss": 0.3715, "rewards/accuracies": 0.875, "rewards/chosen": -0.21890462934970856, "rewards/margins": 2.262279510498047, "rewards/rejected": -2.4811840057373047, "step": 4887 }, { "epoch": 0.56, "learning_rate": 1.3295095399742478e-07, "logits/chosen": -3.116154193878174, "logits/rejected": -2.980287551879883, "logps/chosen": -504.2994384765625, "logps/rejected": -240.18603515625, "loss": 0.6852, "rewards/accuracies": 0.625, "rewards/chosen": -0.1286935806274414, "rewards/margins": 1.1999173164367676, "rewards/rejected": -1.328610897064209, "step": 4888 }, { "epoch": 0.56, "learning_rate": 1.3291583752780054e-07, "logits/chosen": -3.5475594997406006, "logits/rejected": -3.5474624633789062, "logps/chosen": -190.1613006591797, "logps/rejected": -196.43894958496094, "loss": 0.5236, "rewards/accuracies": 0.75, "rewards/chosen": -0.26426127552986145, "rewards/margins": 1.8001375198364258, "rewards/rejected": -2.064398765563965, "step": 4889 }, { "epoch": 0.56, "learning_rate": 1.328807210581763e-07, "logits/chosen": -2.6877055168151855, "logits/rejected": -2.7855048179626465, "logps/chosen": -253.08489990234375, "logps/rejected": -361.6291198730469, "loss": 0.5091, "rewards/accuracies": 0.75, "rewards/chosen": -0.06393276154994965, "rewards/margins": 1.3458161354064941, "rewards/rejected": -1.4097487926483154, "step": 4890 }, { "epoch": 0.56, "learning_rate": 1.3284560458855202e-07, "logits/chosen": -3.1982803344726562, "logits/rejected": -3.3237831592559814, "logps/chosen": -387.70697021484375, "logps/rejected": -313.0211486816406, "loss": 0.1832, "rewards/accuracies": 1.0, "rewards/chosen": -0.3383297026157379, "rewards/margins": 2.886847972869873, "rewards/rejected": -3.2251780033111572, "step": 4891 }, { "epoch": 0.56, "learning_rate": 1.3281048811892777e-07, "logits/chosen": -2.916992664337158, "logits/rejected": -2.759927749633789, "logps/chosen": -251.814697265625, "logps/rejected": -257.35211181640625, "loss": 0.2704, "rewards/accuracies": 1.0, "rewards/chosen": 0.23687852919101715, "rewards/margins": 1.3340376615524292, "rewards/rejected": -1.0971591472625732, "step": 4892 }, { "epoch": 0.56, "learning_rate": 1.3277537164930353e-07, "logits/chosen": -3.240095615386963, "logits/rejected": -2.864713191986084, "logps/chosen": -229.0552978515625, "logps/rejected": -240.14401245117188, "loss": 0.3682, "rewards/accuracies": 0.875, "rewards/chosen": -0.1837874948978424, "rewards/margins": 2.5745575428009033, "rewards/rejected": -2.758344888687134, "step": 4893 }, { "epoch": 0.56, "learning_rate": 1.3274025517967925e-07, "logits/chosen": -2.4051156044006348, "logits/rejected": -2.198991298675537, "logps/chosen": -400.2027282714844, "logps/rejected": -327.56475830078125, "loss": 0.6706, "rewards/accuracies": 0.75, "rewards/chosen": -0.33958742022514343, "rewards/margins": 0.5712059736251831, "rewards/rejected": -0.9107934236526489, "step": 4894 }, { "epoch": 0.56, "learning_rate": 1.32705138710055e-07, "logits/chosen": -3.3251729011535645, "logits/rejected": -3.405212879180908, "logps/chosen": -127.55083465576172, "logps/rejected": -179.42562866210938, "loss": 0.4453, "rewards/accuracies": 0.75, "rewards/chosen": 0.24944451451301575, "rewards/margins": 1.9824185371398926, "rewards/rejected": -1.7329740524291992, "step": 4895 }, { "epoch": 0.56, "learning_rate": 1.3267002224043076e-07, "logits/chosen": -3.0091235637664795, "logits/rejected": -3.131532669067383, "logps/chosen": -338.9698486328125, "logps/rejected": -267.18963623046875, "loss": 0.1376, "rewards/accuracies": 1.0, "rewards/chosen": 0.39405956864356995, "rewards/margins": 2.4632673263549805, "rewards/rejected": -2.0692076683044434, "step": 4896 }, { "epoch": 0.56, "learning_rate": 1.3263490577080652e-07, "logits/chosen": -3.1258771419525146, "logits/rejected": -2.738579273223877, "logps/chosen": -231.29055786132812, "logps/rejected": -162.5377655029297, "loss": 0.4911, "rewards/accuracies": 0.75, "rewards/chosen": -0.18818935751914978, "rewards/margins": 1.9141067266464233, "rewards/rejected": -2.1022961139678955, "step": 4897 }, { "epoch": 0.56, "learning_rate": 1.3259978930118224e-07, "logits/chosen": -2.9624364376068115, "logits/rejected": -3.0433218479156494, "logps/chosen": -444.4438171386719, "logps/rejected": -636.9229125976562, "loss": 0.7902, "rewards/accuracies": 0.625, "rewards/chosen": 0.40306004881858826, "rewards/margins": 0.9127146601676941, "rewards/rejected": -0.5096545815467834, "step": 4898 }, { "epoch": 0.56, "learning_rate": 1.32564672831558e-07, "logits/chosen": -3.2270336151123047, "logits/rejected": -2.948911666870117, "logps/chosen": -317.98828125, "logps/rejected": -210.378173828125, "loss": 0.4427, "rewards/accuracies": 0.875, "rewards/chosen": -0.19304323196411133, "rewards/margins": 1.8410844802856445, "rewards/rejected": -2.034127712249756, "step": 4899 }, { "epoch": 0.56, "learning_rate": 1.3252955636193372e-07, "logits/chosen": -2.9118876457214355, "logits/rejected": -2.8420119285583496, "logps/chosen": -298.6072998046875, "logps/rejected": -333.1017761230469, "loss": 0.5898, "rewards/accuracies": 0.625, "rewards/chosen": -0.4325065612792969, "rewards/margins": 0.8824589252471924, "rewards/rejected": -1.3149654865264893, "step": 4900 }, { "epoch": 0.56, "learning_rate": 1.324944398923095e-07, "logits/chosen": -2.8226513862609863, "logits/rejected": -2.9257545471191406, "logps/chosen": -185.9884033203125, "logps/rejected": -234.37887573242188, "loss": 0.5428, "rewards/accuracies": 0.5, "rewards/chosen": -0.3633840084075928, "rewards/margins": 1.5272854566574097, "rewards/rejected": -1.890669345855713, "step": 4901 }, { "epoch": 0.57, "learning_rate": 1.3245932342268523e-07, "logits/chosen": -2.2878658771514893, "logits/rejected": -2.267481803894043, "logps/chosen": -181.4043731689453, "logps/rejected": -210.738525390625, "loss": 0.1961, "rewards/accuracies": 1.0, "rewards/chosen": -0.10961493849754333, "rewards/margins": 1.8995696306228638, "rewards/rejected": -2.0091843605041504, "step": 4902 }, { "epoch": 0.57, "learning_rate": 1.3242420695306099e-07, "logits/chosen": -4.1411285400390625, "logits/rejected": -3.600309371948242, "logps/chosen": -266.3943786621094, "logps/rejected": -231.4290771484375, "loss": 0.272, "rewards/accuracies": 0.875, "rewards/chosen": -0.30869436264038086, "rewards/margins": 1.6257681846618652, "rewards/rejected": -1.934462547302246, "step": 4903 }, { "epoch": 0.57, "learning_rate": 1.3238909048343671e-07, "logits/chosen": -2.721696138381958, "logits/rejected": -2.9153900146484375, "logps/chosen": -336.3002624511719, "logps/rejected": -339.7408447265625, "loss": 0.3325, "rewards/accuracies": 0.75, "rewards/chosen": 0.4062209129333496, "rewards/margins": 2.216686487197876, "rewards/rejected": -1.810465693473816, "step": 4904 }, { "epoch": 0.57, "learning_rate": 1.3235397401381247e-07, "logits/chosen": -3.0409438610076904, "logits/rejected": -3.128696918487549, "logps/chosen": -194.29405212402344, "logps/rejected": -221.87432861328125, "loss": 0.7538, "rewards/accuracies": 0.75, "rewards/chosen": -0.3714163303375244, "rewards/margins": 0.3739473819732666, "rewards/rejected": -0.745363712310791, "step": 4905 }, { "epoch": 0.57, "learning_rate": 1.3231885754418822e-07, "logits/chosen": -3.6239473819732666, "logits/rejected": -3.068600654602051, "logps/chosen": -282.214111328125, "logps/rejected": -188.49365234375, "loss": 0.3005, "rewards/accuracies": 0.875, "rewards/chosen": -0.13825896382331848, "rewards/margins": 2.6470577716827393, "rewards/rejected": -2.7853169441223145, "step": 4906 }, { "epoch": 0.57, "learning_rate": 1.3228374107456397e-07, "logits/chosen": -3.0840489864349365, "logits/rejected": -3.2296829223632812, "logps/chosen": -197.3389129638672, "logps/rejected": -214.23655700683594, "loss": 0.5496, "rewards/accuracies": 0.625, "rewards/chosen": -0.6762574911117554, "rewards/margins": 0.8705728650093079, "rewards/rejected": -1.5468302965164185, "step": 4907 }, { "epoch": 0.57, "learning_rate": 1.322486246049397e-07, "logits/chosen": -2.6512768268585205, "logits/rejected": -2.9251527786254883, "logps/chosen": -353.0219421386719, "logps/rejected": -388.5227355957031, "loss": 0.5655, "rewards/accuracies": 0.5, "rewards/chosen": -0.24956826865673065, "rewards/margins": 2.1941239833831787, "rewards/rejected": -2.443692207336426, "step": 4908 }, { "epoch": 0.57, "learning_rate": 1.3221350813531546e-07, "logits/chosen": -2.487590789794922, "logits/rejected": -2.6427364349365234, "logps/chosen": -373.6787414550781, "logps/rejected": -385.35650634765625, "loss": 0.2058, "rewards/accuracies": 1.0, "rewards/chosen": 0.3407079577445984, "rewards/margins": 2.158514976501465, "rewards/rejected": -1.8178067207336426, "step": 4909 }, { "epoch": 0.57, "learning_rate": 1.321783916656912e-07, "logits/chosen": -3.645325183868408, "logits/rejected": -3.5673344135284424, "logps/chosen": -293.7283935546875, "logps/rejected": -264.0010681152344, "loss": 0.3221, "rewards/accuracies": 0.875, "rewards/chosen": 0.7474640607833862, "rewards/margins": 1.7244240045547485, "rewards/rejected": -0.9769598245620728, "step": 4910 }, { "epoch": 0.57, "learning_rate": 1.3214327519606694e-07, "logits/chosen": -3.6679060459136963, "logits/rejected": -3.796190023422241, "logps/chosen": -181.4422149658203, "logps/rejected": -291.94854736328125, "loss": 0.4804, "rewards/accuracies": 0.75, "rewards/chosen": -0.10799375176429749, "rewards/margins": 0.9891284108161926, "rewards/rejected": -1.0971221923828125, "step": 4911 }, { "epoch": 0.57, "learning_rate": 1.321081587264427e-07, "logits/chosen": -2.6542954444885254, "logits/rejected": -2.646676778793335, "logps/chosen": -175.72618103027344, "logps/rejected": -210.57452392578125, "loss": 0.2487, "rewards/accuracies": 0.875, "rewards/chosen": 0.4149402976036072, "rewards/margins": 2.5189316272735596, "rewards/rejected": -2.1039915084838867, "step": 4912 }, { "epoch": 0.57, "learning_rate": 1.3207304225681845e-07, "logits/chosen": -2.654193639755249, "logits/rejected": -2.824047565460205, "logps/chosen": -209.71621704101562, "logps/rejected": -294.3822937011719, "loss": 0.2103, "rewards/accuracies": 1.0, "rewards/chosen": 0.2966623902320862, "rewards/margins": 2.2196731567382812, "rewards/rejected": -1.9230107069015503, "step": 4913 }, { "epoch": 0.57, "learning_rate": 1.320379257871942e-07, "logits/chosen": -2.647714376449585, "logits/rejected": -2.917083501815796, "logps/chosen": -255.4839324951172, "logps/rejected": -318.06103515625, "loss": 0.3671, "rewards/accuracies": 0.875, "rewards/chosen": 0.12622520327568054, "rewards/margins": 1.362691879272461, "rewards/rejected": -1.2364667654037476, "step": 4914 }, { "epoch": 0.57, "learning_rate": 1.3200280931756993e-07, "logits/chosen": -3.5380845069885254, "logits/rejected": -3.477008819580078, "logps/chosen": -199.2886962890625, "logps/rejected": -124.77758026123047, "loss": 0.8601, "rewards/accuracies": 0.5, "rewards/chosen": -0.7538517117500305, "rewards/margins": 0.053162574768066406, "rewards/rejected": -0.8070142865180969, "step": 4915 }, { "epoch": 0.57, "learning_rate": 1.3196769284794568e-07, "logits/chosen": -2.753117561340332, "logits/rejected": -2.453453540802002, "logps/chosen": -259.119384765625, "logps/rejected": -501.6586608886719, "loss": 0.2173, "rewards/accuracies": 0.875, "rewards/chosen": -0.5415043830871582, "rewards/margins": 2.6375315189361572, "rewards/rejected": -3.1790361404418945, "step": 4916 }, { "epoch": 0.57, "learning_rate": 1.319325763783214e-07, "logits/chosen": -3.4097723960876465, "logits/rejected": -3.266212224960327, "logps/chosen": -232.3413543701172, "logps/rejected": -204.48028564453125, "loss": 0.5325, "rewards/accuracies": 0.625, "rewards/chosen": -0.5029879808425903, "rewards/margins": 0.472893089056015, "rewards/rejected": -0.9758810997009277, "step": 4917 }, { "epoch": 0.57, "learning_rate": 1.318974599086972e-07, "logits/chosen": -2.788933277130127, "logits/rejected": -2.9494869709014893, "logps/chosen": -363.47259521484375, "logps/rejected": -179.11434936523438, "loss": 0.4688, "rewards/accuracies": 0.875, "rewards/chosen": 0.006411215290427208, "rewards/margins": 1.4677619934082031, "rewards/rejected": -1.461350679397583, "step": 4918 }, { "epoch": 0.57, "learning_rate": 1.3186234343907292e-07, "logits/chosen": -3.520358085632324, "logits/rejected": -3.615854024887085, "logps/chosen": -237.20941162109375, "logps/rejected": -310.415771484375, "loss": 0.3146, "rewards/accuracies": 0.875, "rewards/chosen": -0.37379762530326843, "rewards/margins": 1.78755521774292, "rewards/rejected": -2.1613526344299316, "step": 4919 }, { "epoch": 0.57, "learning_rate": 1.3182722696944867e-07, "logits/chosen": -3.468078374862671, "logits/rejected": -4.001546859741211, "logps/chosen": -89.44667053222656, "logps/rejected": -243.12420654296875, "loss": 0.1656, "rewards/accuracies": 1.0, "rewards/chosen": 0.13077807426452637, "rewards/margins": 2.8602848052978516, "rewards/rejected": -2.729506731033325, "step": 4920 }, { "epoch": 0.57, "learning_rate": 1.317921104998244e-07, "logits/chosen": -2.8403260707855225, "logits/rejected": -3.0432260036468506, "logps/chosen": -198.11672973632812, "logps/rejected": -249.2513427734375, "loss": 0.1349, "rewards/accuracies": 1.0, "rewards/chosen": 0.20555776357650757, "rewards/margins": 2.364628314971924, "rewards/rejected": -2.1590704917907715, "step": 4921 }, { "epoch": 0.57, "learning_rate": 1.3175699403020018e-07, "logits/chosen": -3.4067773818969727, "logits/rejected": -3.585719585418701, "logps/chosen": -157.963623046875, "logps/rejected": -172.57028198242188, "loss": 0.9706, "rewards/accuracies": 0.375, "rewards/chosen": -0.5504069924354553, "rewards/margins": 0.7901512384414673, "rewards/rejected": -1.3405581712722778, "step": 4922 }, { "epoch": 0.57, "learning_rate": 1.317218775605759e-07, "logits/chosen": -2.902548313140869, "logits/rejected": -2.780874252319336, "logps/chosen": -366.435546875, "logps/rejected": -368.0665588378906, "loss": 0.3679, "rewards/accuracies": 0.75, "rewards/chosen": 0.06786558032035828, "rewards/margins": 1.2423560619354248, "rewards/rejected": -1.1744904518127441, "step": 4923 }, { "epoch": 0.57, "learning_rate": 1.3168676109095166e-07, "logits/chosen": -2.9311952590942383, "logits/rejected": -2.7318408489227295, "logps/chosen": -263.76934814453125, "logps/rejected": -245.297119140625, "loss": 0.4397, "rewards/accuracies": 0.625, "rewards/chosen": -0.5990593433380127, "rewards/margins": 1.364341378211975, "rewards/rejected": -1.9634007215499878, "step": 4924 }, { "epoch": 0.57, "learning_rate": 1.3165164462132739e-07, "logits/chosen": -2.691896915435791, "logits/rejected": -2.9158291816711426, "logps/chosen": -314.6687927246094, "logps/rejected": -231.56893920898438, "loss": 0.4682, "rewards/accuracies": 0.75, "rewards/chosen": -0.20459231734275818, "rewards/margins": 1.2141802310943604, "rewards/rejected": -1.418772578239441, "step": 4925 }, { "epoch": 0.57, "learning_rate": 1.3161652815170314e-07, "logits/chosen": -2.8826990127563477, "logits/rejected": -3.0244998931884766, "logps/chosen": -165.87738037109375, "logps/rejected": -197.8976287841797, "loss": 0.4684, "rewards/accuracies": 0.625, "rewards/chosen": -0.2988194227218628, "rewards/margins": 1.136254906654358, "rewards/rejected": -1.4350742101669312, "step": 4926 }, { "epoch": 0.57, "learning_rate": 1.315814116820789e-07, "logits/chosen": -3.3401851654052734, "logits/rejected": -3.4247074127197266, "logps/chosen": -184.37310791015625, "logps/rejected": -185.7845458984375, "loss": 0.4898, "rewards/accuracies": 0.75, "rewards/chosen": -0.3878783583641052, "rewards/margins": 1.044654130935669, "rewards/rejected": -1.432532548904419, "step": 4927 }, { "epoch": 0.57, "learning_rate": 1.3154629521245462e-07, "logits/chosen": -3.3403563499450684, "logits/rejected": -3.1390328407287598, "logps/chosen": -189.7566375732422, "logps/rejected": -295.0284118652344, "loss": 0.1959, "rewards/accuracies": 0.875, "rewards/chosen": 0.11122691631317139, "rewards/margins": 2.3836684226989746, "rewards/rejected": -2.2724413871765137, "step": 4928 }, { "epoch": 0.57, "learning_rate": 1.3151117874283037e-07, "logits/chosen": -3.452359676361084, "logits/rejected": -3.2125983238220215, "logps/chosen": -598.569580078125, "logps/rejected": -291.1309814453125, "loss": 0.3292, "rewards/accuracies": 1.0, "rewards/chosen": -0.1856641173362732, "rewards/margins": 1.721681833267212, "rewards/rejected": -1.9073460102081299, "step": 4929 }, { "epoch": 0.57, "learning_rate": 1.3147606227320613e-07, "logits/chosen": -3.3267569541931152, "logits/rejected": -3.52630877494812, "logps/chosen": -193.173828125, "logps/rejected": -273.3988037109375, "loss": 0.4766, "rewards/accuracies": 0.875, "rewards/chosen": 0.07897251844406128, "rewards/margins": 1.0750197172164917, "rewards/rejected": -0.9960471987724304, "step": 4930 }, { "epoch": 0.57, "learning_rate": 1.3144094580358188e-07, "logits/chosen": -2.708320140838623, "logits/rejected": -2.6287567615509033, "logps/chosen": -74.66765594482422, "logps/rejected": -198.45526123046875, "loss": 0.8741, "rewards/accuracies": 0.5, "rewards/chosen": -0.9132437705993652, "rewards/margins": 0.2134455442428589, "rewards/rejected": -1.1266894340515137, "step": 4931 }, { "epoch": 0.57, "learning_rate": 1.314058293339576e-07, "logits/chosen": -2.5971577167510986, "logits/rejected": -2.555586099624634, "logps/chosen": -247.43585205078125, "logps/rejected": -268.5702819824219, "loss": 0.5352, "rewards/accuracies": 0.625, "rewards/chosen": -0.03060247004032135, "rewards/margins": 1.708949089050293, "rewards/rejected": -1.7395515441894531, "step": 4932 }, { "epoch": 0.57, "learning_rate": 1.3137071286433336e-07, "logits/chosen": -3.214047431945801, "logits/rejected": -3.0868911743164062, "logps/chosen": -343.50177001953125, "logps/rejected": -256.94512939453125, "loss": 0.4016, "rewards/accuracies": 0.75, "rewards/chosen": 0.2862287759780884, "rewards/margins": 2.061235189437866, "rewards/rejected": -1.7750064134597778, "step": 4933 }, { "epoch": 0.57, "learning_rate": 1.3133559639470912e-07, "logits/chosen": -2.4888644218444824, "logits/rejected": -2.636190414428711, "logps/chosen": -401.0740966796875, "logps/rejected": -216.77523803710938, "loss": 0.325, "rewards/accuracies": 0.875, "rewards/chosen": -0.06943672895431519, "rewards/margins": 1.4723007678985596, "rewards/rejected": -1.5417375564575195, "step": 4934 }, { "epoch": 0.57, "learning_rate": 1.3130047992508487e-07, "logits/chosen": -2.8792853355407715, "logits/rejected": -2.895745277404785, "logps/chosen": -213.0355224609375, "logps/rejected": -358.20220947265625, "loss": 0.1693, "rewards/accuracies": 1.0, "rewards/chosen": -0.22030703723430634, "rewards/margins": 2.918618679046631, "rewards/rejected": -3.138925552368164, "step": 4935 }, { "epoch": 0.57, "learning_rate": 1.312653634554606e-07, "logits/chosen": -3.2108352184295654, "logits/rejected": -3.3150007724761963, "logps/chosen": -324.4971618652344, "logps/rejected": -328.8071594238281, "loss": 0.2518, "rewards/accuracies": 1.0, "rewards/chosen": 0.5123298764228821, "rewards/margins": 2.5654871463775635, "rewards/rejected": -2.053157091140747, "step": 4936 }, { "epoch": 0.57, "learning_rate": 1.3123024698583635e-07, "logits/chosen": -2.8900461196899414, "logits/rejected": -2.489104747772217, "logps/chosen": -151.1640625, "logps/rejected": -158.85409545898438, "loss": 0.3136, "rewards/accuracies": 0.875, "rewards/chosen": 0.08491766452789307, "rewards/margins": 1.5959644317626953, "rewards/rejected": -1.5110468864440918, "step": 4937 }, { "epoch": 0.57, "learning_rate": 1.311951305162121e-07, "logits/chosen": -3.1130785942077637, "logits/rejected": -3.0720691680908203, "logps/chosen": -210.61068725585938, "logps/rejected": -357.0616455078125, "loss": 0.273, "rewards/accuracies": 1.0, "rewards/chosen": 0.13546809554100037, "rewards/margins": 1.3826814889907837, "rewards/rejected": -1.247213363647461, "step": 4938 }, { "epoch": 0.57, "learning_rate": 1.3116001404658786e-07, "logits/chosen": -3.9551098346710205, "logits/rejected": -3.6575276851654053, "logps/chosen": -238.61065673828125, "logps/rejected": -218.1276397705078, "loss": 0.7106, "rewards/accuracies": 0.75, "rewards/chosen": -0.2874159812927246, "rewards/margins": 1.1699365377426147, "rewards/rejected": -1.4573523998260498, "step": 4939 }, { "epoch": 0.57, "learning_rate": 1.311248975769636e-07, "logits/chosen": -3.440756320953369, "logits/rejected": -3.40440034866333, "logps/chosen": -394.1746826171875, "logps/rejected": -441.11181640625, "loss": 0.3234, "rewards/accuracies": 1.0, "rewards/chosen": 0.27452021837234497, "rewards/margins": 2.6201705932617188, "rewards/rejected": -2.3456499576568604, "step": 4940 }, { "epoch": 0.57, "learning_rate": 1.3108978110733934e-07, "logits/chosen": -2.6250715255737305, "logits/rejected": -2.7495172023773193, "logps/chosen": -222.3017578125, "logps/rejected": -269.2227478027344, "loss": 0.7082, "rewards/accuracies": 0.625, "rewards/chosen": -0.3660350441932678, "rewards/margins": 1.5599664449691772, "rewards/rejected": -1.9260013103485107, "step": 4941 }, { "epoch": 0.57, "learning_rate": 1.310546646377151e-07, "logits/chosen": -3.0981194972991943, "logits/rejected": -2.930821657180786, "logps/chosen": -372.7041015625, "logps/rejected": -225.827880859375, "loss": 0.444, "rewards/accuracies": 0.75, "rewards/chosen": -0.21049928665161133, "rewards/margins": 0.8749426603317261, "rewards/rejected": -1.0854419469833374, "step": 4942 }, { "epoch": 0.57, "learning_rate": 1.3101954816809082e-07, "logits/chosen": -2.919790029525757, "logits/rejected": -2.80733060836792, "logps/chosen": -226.75860595703125, "logps/rejected": -322.7506103515625, "loss": 0.5625, "rewards/accuracies": 0.75, "rewards/chosen": -0.23190197348594666, "rewards/margins": 0.9645235538482666, "rewards/rejected": -1.1964255571365356, "step": 4943 }, { "epoch": 0.57, "learning_rate": 1.3098443169846658e-07, "logits/chosen": -3.527676582336426, "logits/rejected": -3.2604856491088867, "logps/chosen": -309.6979064941406, "logps/rejected": -482.35015869140625, "loss": 0.4363, "rewards/accuracies": 0.875, "rewards/chosen": -0.7147489786148071, "rewards/margins": 2.247283697128296, "rewards/rejected": -2.9620323181152344, "step": 4944 }, { "epoch": 0.57, "learning_rate": 1.309493152288423e-07, "logits/chosen": -2.459625720977783, "logits/rejected": -2.464909076690674, "logps/chosen": -360.2669677734375, "logps/rejected": -304.644287109375, "loss": 0.4034, "rewards/accuracies": 0.625, "rewards/chosen": 0.06674538552761078, "rewards/margins": 1.4802676439285278, "rewards/rejected": -1.4135222434997559, "step": 4945 }, { "epoch": 0.57, "learning_rate": 1.3091419875921808e-07, "logits/chosen": -3.047049045562744, "logits/rejected": -2.74877667427063, "logps/chosen": -208.1866912841797, "logps/rejected": -197.7428741455078, "loss": 0.4204, "rewards/accuracies": 0.875, "rewards/chosen": -0.0576503723859787, "rewards/margins": 0.9361428618431091, "rewards/rejected": -0.993793249130249, "step": 4946 }, { "epoch": 0.57, "learning_rate": 1.308790822895938e-07, "logits/chosen": -2.5349559783935547, "logits/rejected": -2.4753901958465576, "logps/chosen": -324.45916748046875, "logps/rejected": -234.22232055664062, "loss": 0.2112, "rewards/accuracies": 1.0, "rewards/chosen": 0.04843682050704956, "rewards/margins": 2.6658525466918945, "rewards/rejected": -2.617415428161621, "step": 4947 }, { "epoch": 0.57, "learning_rate": 1.3084396581996957e-07, "logits/chosen": -3.1294267177581787, "logits/rejected": -2.6886649131774902, "logps/chosen": -226.78961181640625, "logps/rejected": -145.67022705078125, "loss": 0.4323, "rewards/accuracies": 0.75, "rewards/chosen": -0.29552125930786133, "rewards/margins": 1.0085690021514893, "rewards/rejected": -1.3040904998779297, "step": 4948 }, { "epoch": 0.57, "learning_rate": 1.308088493503453e-07, "logits/chosen": -2.639169216156006, "logits/rejected": -2.593147039413452, "logps/chosen": -317.732177734375, "logps/rejected": -387.45440673828125, "loss": 0.49, "rewards/accuracies": 0.75, "rewards/chosen": -0.37562912702560425, "rewards/margins": 0.843109667301178, "rewards/rejected": -1.2187387943267822, "step": 4949 }, { "epoch": 0.57, "learning_rate": 1.3077373288072107e-07, "logits/chosen": -2.7017710208892822, "logits/rejected": -2.9531190395355225, "logps/chosen": -428.14129638671875, "logps/rejected": -471.3913879394531, "loss": 0.3615, "rewards/accuracies": 0.875, "rewards/chosen": -0.008618343621492386, "rewards/margins": 2.5370397567749023, "rewards/rejected": -2.5456581115722656, "step": 4950 }, { "epoch": 0.57, "learning_rate": 1.307386164110968e-07, "logits/chosen": -3.5788164138793945, "logits/rejected": -3.5686447620391846, "logps/chosen": -285.7480773925781, "logps/rejected": -237.45346069335938, "loss": 0.1858, "rewards/accuracies": 0.875, "rewards/chosen": 0.24418598413467407, "rewards/margins": 2.5306057929992676, "rewards/rejected": -2.28641939163208, "step": 4951 }, { "epoch": 0.57, "learning_rate": 1.3070349994147255e-07, "logits/chosen": -2.940814971923828, "logits/rejected": -3.0005006790161133, "logps/chosen": -211.50926208496094, "logps/rejected": -215.00997924804688, "loss": 0.4796, "rewards/accuracies": 0.875, "rewards/chosen": -0.22036145627498627, "rewards/margins": 1.2387142181396484, "rewards/rejected": -1.459075927734375, "step": 4952 }, { "epoch": 0.57, "learning_rate": 1.3066838347184828e-07, "logits/chosen": -3.748441696166992, "logits/rejected": -3.5982909202575684, "logps/chosen": -218.64715576171875, "logps/rejected": -282.0867004394531, "loss": 0.2257, "rewards/accuracies": 1.0, "rewards/chosen": -0.28843116760253906, "rewards/margins": 2.623072385787964, "rewards/rejected": -2.911503553390503, "step": 4953 }, { "epoch": 0.57, "learning_rate": 1.3063326700222404e-07, "logits/chosen": -3.421612024307251, "logits/rejected": -3.4151322841644287, "logps/chosen": -333.5555725097656, "logps/rejected": -382.7013854980469, "loss": 0.3102, "rewards/accuracies": 0.875, "rewards/chosen": -0.5651994943618774, "rewards/margins": 2.2862436771392822, "rewards/rejected": -2.851442813873291, "step": 4954 }, { "epoch": 0.57, "learning_rate": 1.305981505325998e-07, "logits/chosen": -2.5706520080566406, "logits/rejected": -2.4061429500579834, "logps/chosen": -347.1488037109375, "logps/rejected": -270.0299377441406, "loss": 0.5195, "rewards/accuracies": 0.75, "rewards/chosen": 0.35082316398620605, "rewards/margins": 1.316308856010437, "rewards/rejected": -0.9654858112335205, "step": 4955 }, { "epoch": 0.57, "learning_rate": 1.3056303406297554e-07, "logits/chosen": -2.380948543548584, "logits/rejected": -2.5193533897399902, "logps/chosen": -225.32102966308594, "logps/rejected": -214.96109008789062, "loss": 0.5032, "rewards/accuracies": 0.75, "rewards/chosen": -0.2842986285686493, "rewards/margins": 0.9809271097183228, "rewards/rejected": -1.2652257680892944, "step": 4956 }, { "epoch": 0.57, "learning_rate": 1.3052791759335127e-07, "logits/chosen": -3.865328311920166, "logits/rejected": -3.5313165187835693, "logps/chosen": -192.69833374023438, "logps/rejected": -221.15577697753906, "loss": 0.3168, "rewards/accuracies": 0.875, "rewards/chosen": 0.16514281928539276, "rewards/margins": 1.2249484062194824, "rewards/rejected": -1.0598055124282837, "step": 4957 }, { "epoch": 0.57, "learning_rate": 1.3049280112372702e-07, "logits/chosen": -2.8983750343322754, "logits/rejected": -2.51039457321167, "logps/chosen": -261.3135681152344, "logps/rejected": -144.97738647460938, "loss": 0.4696, "rewards/accuracies": 0.75, "rewards/chosen": -0.6397002935409546, "rewards/margins": 0.7635846138000488, "rewards/rejected": -1.403285026550293, "step": 4958 }, { "epoch": 0.57, "learning_rate": 1.3045768465410278e-07, "logits/chosen": -3.029940128326416, "logits/rejected": -3.16182804107666, "logps/chosen": -217.603759765625, "logps/rejected": -181.3094940185547, "loss": 0.5391, "rewards/accuracies": 0.625, "rewards/chosen": -0.4878467917442322, "rewards/margins": 1.7707659006118774, "rewards/rejected": -2.258612632751465, "step": 4959 }, { "epoch": 0.57, "learning_rate": 1.304225681844785e-07, "logits/chosen": -2.7982935905456543, "logits/rejected": -2.7803802490234375, "logps/chosen": -268.7843933105469, "logps/rejected": -252.13873291015625, "loss": 0.4478, "rewards/accuracies": 0.875, "rewards/chosen": -0.10395797342061996, "rewards/margins": 1.4850444793701172, "rewards/rejected": -1.5890026092529297, "step": 4960 }, { "epoch": 0.57, "learning_rate": 1.3038745171485426e-07, "logits/chosen": -3.3695876598358154, "logits/rejected": -3.5060722827911377, "logps/chosen": -251.0154266357422, "logps/rejected": -333.7469787597656, "loss": 0.3169, "rewards/accuracies": 0.875, "rewards/chosen": 0.26864930987358093, "rewards/margins": 2.23482084274292, "rewards/rejected": -1.966171383857727, "step": 4961 }, { "epoch": 0.57, "learning_rate": 1.3035233524523e-07, "logits/chosen": -3.18544864654541, "logits/rejected": -3.3944997787475586, "logps/chosen": -295.8947448730469, "logps/rejected": -253.98648071289062, "loss": 0.22, "rewards/accuracies": 1.0, "rewards/chosen": 0.6138091683387756, "rewards/margins": 2.1381993293762207, "rewards/rejected": -1.5243899822235107, "step": 4962 }, { "epoch": 0.57, "learning_rate": 1.3031721877560577e-07, "logits/chosen": -3.6116018295288086, "logits/rejected": -3.7100014686584473, "logps/chosen": -285.93670654296875, "logps/rejected": -347.6808166503906, "loss": 0.1586, "rewards/accuracies": 0.875, "rewards/chosen": -0.48476165533065796, "rewards/margins": 3.9011690616607666, "rewards/rejected": -4.38593053817749, "step": 4963 }, { "epoch": 0.57, "learning_rate": 1.302821023059815e-07, "logits/chosen": -3.055772542953491, "logits/rejected": -3.0889971256256104, "logps/chosen": -169.44876098632812, "logps/rejected": -222.90133666992188, "loss": 0.3566, "rewards/accuracies": 1.0, "rewards/chosen": 0.026242226362228394, "rewards/margins": 1.058784008026123, "rewards/rejected": -1.0325417518615723, "step": 4964 }, { "epoch": 0.57, "learning_rate": 1.3024698583635725e-07, "logits/chosen": -2.95626163482666, "logits/rejected": -2.931473970413208, "logps/chosen": -261.1575927734375, "logps/rejected": -288.1734924316406, "loss": 0.4206, "rewards/accuracies": 0.875, "rewards/chosen": -0.3226381540298462, "rewards/margins": 1.2701525688171387, "rewards/rejected": -1.5927908420562744, "step": 4965 }, { "epoch": 0.57, "learning_rate": 1.3021186936673298e-07, "logits/chosen": -2.9799439907073975, "logits/rejected": -3.1734542846679688, "logps/chosen": -144.12384033203125, "logps/rejected": -167.71217346191406, "loss": 0.5769, "rewards/accuracies": 0.75, "rewards/chosen": -0.014158889651298523, "rewards/margins": 0.7376977205276489, "rewards/rejected": -0.7518566250801086, "step": 4966 }, { "epoch": 0.57, "learning_rate": 1.3017675289710876e-07, "logits/chosen": -2.761080503463745, "logits/rejected": -2.71244478225708, "logps/chosen": -406.71026611328125, "logps/rejected": -285.1802978515625, "loss": 0.3038, "rewards/accuracies": 1.0, "rewards/chosen": -0.5535622835159302, "rewards/margins": 1.3473411798477173, "rewards/rejected": -1.9009034633636475, "step": 4967 }, { "epoch": 0.57, "learning_rate": 1.3014163642748448e-07, "logits/chosen": -2.9888193607330322, "logits/rejected": -2.759368896484375, "logps/chosen": -175.24514770507812, "logps/rejected": -214.9909210205078, "loss": 0.7225, "rewards/accuracies": 0.625, "rewards/chosen": -0.36271438002586365, "rewards/margins": 0.15871021151542664, "rewards/rejected": -0.5214246511459351, "step": 4968 }, { "epoch": 0.57, "learning_rate": 1.3010651995786024e-07, "logits/chosen": -3.4790406227111816, "logits/rejected": -3.534580707550049, "logps/chosen": -289.99566650390625, "logps/rejected": -168.06858825683594, "loss": 0.3559, "rewards/accuracies": 1.0, "rewards/chosen": 0.12954498827457428, "rewards/margins": 1.1969680786132812, "rewards/rejected": -1.0674229860305786, "step": 4969 }, { "epoch": 0.57, "learning_rate": 1.3007140348823596e-07, "logits/chosen": -3.0978968143463135, "logits/rejected": -2.810429096221924, "logps/chosen": -246.46380615234375, "logps/rejected": -372.7960205078125, "loss": 0.2477, "rewards/accuracies": 1.0, "rewards/chosen": 0.24708181619644165, "rewards/margins": 2.0090901851654053, "rewards/rejected": -1.7620083093643188, "step": 4970 }, { "epoch": 0.57, "learning_rate": 1.3003628701861172e-07, "logits/chosen": -2.1795616149902344, "logits/rejected": -2.157310962677002, "logps/chosen": -230.5937042236328, "logps/rejected": -272.43585205078125, "loss": 0.1992, "rewards/accuracies": 1.0, "rewards/chosen": 0.5251719355583191, "rewards/margins": 1.9233736991882324, "rewards/rejected": -1.398201823234558, "step": 4971 }, { "epoch": 0.57, "learning_rate": 1.3000117054898747e-07, "logits/chosen": -3.417649984359741, "logits/rejected": -3.2250962257385254, "logps/chosen": -322.67083740234375, "logps/rejected": -269.47613525390625, "loss": 1.009, "rewards/accuracies": 0.5, "rewards/chosen": -1.32966947555542, "rewards/margins": -0.13085809350013733, "rewards/rejected": -1.1988112926483154, "step": 4972 }, { "epoch": 0.57, "learning_rate": 1.2996605407936323e-07, "logits/chosen": -3.0274643898010254, "logits/rejected": -3.1419639587402344, "logps/chosen": -265.71148681640625, "logps/rejected": -303.3494873046875, "loss": 0.4276, "rewards/accuracies": 0.75, "rewards/chosen": -0.008280612528324127, "rewards/margins": 1.550323486328125, "rewards/rejected": -1.5586040019989014, "step": 4973 }, { "epoch": 0.57, "learning_rate": 1.2993093760973895e-07, "logits/chosen": -2.9352335929870605, "logits/rejected": -2.881777048110962, "logps/chosen": -431.638671875, "logps/rejected": -291.3288879394531, "loss": 0.2504, "rewards/accuracies": 1.0, "rewards/chosen": 0.10204711556434631, "rewards/margins": 2.2424111366271973, "rewards/rejected": -2.140364170074463, "step": 4974 }, { "epoch": 0.57, "learning_rate": 1.298958211401147e-07, "logits/chosen": -2.822690486907959, "logits/rejected": -3.1079559326171875, "logps/chosen": -256.0130310058594, "logps/rejected": -332.115234375, "loss": 0.2997, "rewards/accuracies": 0.875, "rewards/chosen": 0.06218039244413376, "rewards/margins": 2.771812915802002, "rewards/rejected": -2.709632396697998, "step": 4975 }, { "epoch": 0.57, "learning_rate": 1.2986070467049046e-07, "logits/chosen": -2.963986873626709, "logits/rejected": -2.7930469512939453, "logps/chosen": -276.9523620605469, "logps/rejected": -347.6455383300781, "loss": 0.0815, "rewards/accuracies": 1.0, "rewards/chosen": 0.5524616837501526, "rewards/margins": 4.511364936828613, "rewards/rejected": -3.9589028358459473, "step": 4976 }, { "epoch": 0.57, "learning_rate": 1.298255882008662e-07, "logits/chosen": -3.1929097175598145, "logits/rejected": -3.2397401332855225, "logps/chosen": -219.9685821533203, "logps/rejected": -266.9318542480469, "loss": 0.2204, "rewards/accuracies": 0.875, "rewards/chosen": 0.06854460388422012, "rewards/margins": 2.593303680419922, "rewards/rejected": -2.524759292602539, "step": 4977 }, { "epoch": 0.57, "learning_rate": 1.2979047173124194e-07, "logits/chosen": -2.3190293312072754, "logits/rejected": -2.4377365112304688, "logps/chosen": -410.7904052734375, "logps/rejected": -282.447509765625, "loss": 0.5251, "rewards/accuracies": 0.75, "rewards/chosen": 0.20272788405418396, "rewards/margins": 1.3600897789001465, "rewards/rejected": -1.1573619842529297, "step": 4978 }, { "epoch": 0.57, "learning_rate": 1.297553552616177e-07, "logits/chosen": -3.8452324867248535, "logits/rejected": -3.7175536155700684, "logps/chosen": -178.24652099609375, "logps/rejected": -185.779052734375, "loss": 0.2639, "rewards/accuracies": 0.875, "rewards/chosen": 0.17466901242733002, "rewards/margins": 2.2194957733154297, "rewards/rejected": -2.0448265075683594, "step": 4979 }, { "epoch": 0.57, "learning_rate": 1.2972023879199345e-07, "logits/chosen": -2.8956191539764404, "logits/rejected": -2.9622480869293213, "logps/chosen": -226.55337524414062, "logps/rejected": -124.57035064697266, "loss": 0.3953, "rewards/accuracies": 0.875, "rewards/chosen": -0.62633216381073, "rewards/margins": 1.3090336322784424, "rewards/rejected": -1.9353657960891724, "step": 4980 }, { "epoch": 0.57, "learning_rate": 1.2968512232236918e-07, "logits/chosen": -3.0936715602874756, "logits/rejected": -3.286043882369995, "logps/chosen": -102.5303726196289, "logps/rejected": -99.36160278320312, "loss": 0.6965, "rewards/accuracies": 0.625, "rewards/chosen": -1.0346473455429077, "rewards/margins": 0.2572775185108185, "rewards/rejected": -1.2919249534606934, "step": 4981 }, { "epoch": 0.57, "learning_rate": 1.2965000585274493e-07, "logits/chosen": -2.982877731323242, "logits/rejected": -3.151062250137329, "logps/chosen": -271.1722717285156, "logps/rejected": -270.23291015625, "loss": 0.3182, "rewards/accuracies": 0.875, "rewards/chosen": 0.37522900104522705, "rewards/margins": 2.494188070297241, "rewards/rejected": -2.1189589500427246, "step": 4982 }, { "epoch": 0.57, "learning_rate": 1.2961488938312069e-07, "logits/chosen": -3.476167678833008, "logits/rejected": -3.4448540210723877, "logps/chosen": -363.12005615234375, "logps/rejected": -244.47769165039062, "loss": 0.7593, "rewards/accuracies": 0.75, "rewards/chosen": -0.11011549830436707, "rewards/margins": 1.4350695610046387, "rewards/rejected": -1.5451852083206177, "step": 4983 }, { "epoch": 0.57, "learning_rate": 1.2957977291349644e-07, "logits/chosen": -3.26519775390625, "logits/rejected": -3.210214138031006, "logps/chosen": -251.0633544921875, "logps/rejected": -253.66310119628906, "loss": 0.1766, "rewards/accuracies": 0.875, "rewards/chosen": 0.03160540759563446, "rewards/margins": 3.868051290512085, "rewards/rejected": -3.8364460468292236, "step": 4984 }, { "epoch": 0.57, "learning_rate": 1.2954465644387217e-07, "logits/chosen": -2.7529828548431396, "logits/rejected": -2.686272144317627, "logps/chosen": -300.24365234375, "logps/rejected": -265.9488220214844, "loss": 0.3701, "rewards/accuracies": 0.75, "rewards/chosen": -0.2547810673713684, "rewards/margins": 1.9681310653686523, "rewards/rejected": -2.222912073135376, "step": 4985 }, { "epoch": 0.57, "learning_rate": 1.2950953997424792e-07, "logits/chosen": -2.576279640197754, "logits/rejected": -2.701568603515625, "logps/chosen": -435.45977783203125, "logps/rejected": -294.992431640625, "loss": 0.4067, "rewards/accuracies": 0.75, "rewards/chosen": -0.34801918268203735, "rewards/margins": 1.2943801879882812, "rewards/rejected": -1.6423994302749634, "step": 4986 }, { "epoch": 0.57, "learning_rate": 1.2947442350462367e-07, "logits/chosen": -3.6411819458007812, "logits/rejected": -3.500779151916504, "logps/chosen": -286.2313537597656, "logps/rejected": -190.35989379882812, "loss": 0.4886, "rewards/accuracies": 0.625, "rewards/chosen": -0.024443045258522034, "rewards/margins": 1.3339817523956299, "rewards/rejected": -1.3584247827529907, "step": 4987 }, { "epoch": 0.58, "learning_rate": 1.294393070349994e-07, "logits/chosen": -3.300842523574829, "logits/rejected": -3.0408682823181152, "logps/chosen": -224.90708923339844, "logps/rejected": -244.97755432128906, "loss": 0.4946, "rewards/accuracies": 0.75, "rewards/chosen": 0.010892353951931, "rewards/margins": 2.1356139183044434, "rewards/rejected": -2.1247217655181885, "step": 4988 }, { "epoch": 0.58, "learning_rate": 1.2940419056537516e-07, "logits/chosen": -3.3927745819091797, "logits/rejected": -3.22948956489563, "logps/chosen": -203.30419921875, "logps/rejected": -247.09532165527344, "loss": 0.296, "rewards/accuracies": 1.0, "rewards/chosen": -0.039208024740219116, "rewards/margins": 2.0847160816192627, "rewards/rejected": -2.1239237785339355, "step": 4989 }, { "epoch": 0.58, "learning_rate": 1.293690740957509e-07, "logits/chosen": -2.8651180267333984, "logits/rejected": -2.9935035705566406, "logps/chosen": -78.1316146850586, "logps/rejected": -166.19151306152344, "loss": 0.3983, "rewards/accuracies": 0.75, "rewards/chosen": 0.07077307999134064, "rewards/margins": 1.4879200458526611, "rewards/rejected": -1.417146921157837, "step": 4990 }, { "epoch": 0.58, "learning_rate": 1.2933395762612666e-07, "logits/chosen": -2.452582836151123, "logits/rejected": -2.4943652153015137, "logps/chosen": -332.3829650878906, "logps/rejected": -163.5178985595703, "loss": 0.4641, "rewards/accuracies": 0.75, "rewards/chosen": -0.2305031418800354, "rewards/margins": 1.5296592712402344, "rewards/rejected": -1.760162353515625, "step": 4991 }, { "epoch": 0.58, "learning_rate": 1.292988411565024e-07, "logits/chosen": -3.7815663814544678, "logits/rejected": -3.917253017425537, "logps/chosen": -150.0355224609375, "logps/rejected": -224.300537109375, "loss": 0.7931, "rewards/accuracies": 0.5, "rewards/chosen": -1.0316247940063477, "rewards/margins": 1.3243701457977295, "rewards/rejected": -2.355994939804077, "step": 4992 }, { "epoch": 0.58, "learning_rate": 1.2926372468687814e-07, "logits/chosen": -2.853198528289795, "logits/rejected": -2.635854482650757, "logps/chosen": -531.5020751953125, "logps/rejected": -340.3063659667969, "loss": 0.356, "rewards/accuracies": 0.875, "rewards/chosen": -0.23451195657253265, "rewards/margins": 1.9588029384613037, "rewards/rejected": -2.193315029144287, "step": 4993 }, { "epoch": 0.58, "learning_rate": 1.2922860821725387e-07, "logits/chosen": -2.662381172180176, "logits/rejected": -2.8075764179229736, "logps/chosen": -182.6499481201172, "logps/rejected": -244.21336364746094, "loss": 0.1157, "rewards/accuracies": 1.0, "rewards/chosen": 0.4673260450363159, "rewards/margins": 3.4276375770568848, "rewards/rejected": -2.9603118896484375, "step": 4994 }, { "epoch": 0.58, "learning_rate": 1.2919349174762965e-07, "logits/chosen": -3.3101654052734375, "logits/rejected": -3.065042734146118, "logps/chosen": -338.4549865722656, "logps/rejected": -168.30606079101562, "loss": 0.5179, "rewards/accuracies": 0.625, "rewards/chosen": -0.34526893496513367, "rewards/margins": 0.9949976205825806, "rewards/rejected": -1.340266466140747, "step": 4995 }, { "epoch": 0.58, "learning_rate": 1.2915837527800538e-07, "logits/chosen": -3.098053455352783, "logits/rejected": -3.0035243034362793, "logps/chosen": -287.24951171875, "logps/rejected": -238.79983520507812, "loss": 0.3705, "rewards/accuracies": 0.75, "rewards/chosen": -0.5066121220588684, "rewards/margins": 1.2992783784866333, "rewards/rejected": -1.805890440940857, "step": 4996 }, { "epoch": 0.58, "learning_rate": 1.2912325880838113e-07, "logits/chosen": -3.003601551055908, "logits/rejected": -3.206362009048462, "logps/chosen": -266.2685546875, "logps/rejected": -226.38819885253906, "loss": 0.23, "rewards/accuracies": 1.0, "rewards/chosen": 0.1355937123298645, "rewards/margins": 1.8285493850708008, "rewards/rejected": -1.6929556131362915, "step": 4997 }, { "epoch": 0.58, "learning_rate": 1.2908814233875686e-07, "logits/chosen": -2.7873265743255615, "logits/rejected": -2.7481977939605713, "logps/chosen": -217.50782775878906, "logps/rejected": -217.24594116210938, "loss": 0.554, "rewards/accuracies": 0.625, "rewards/chosen": -0.1744399070739746, "rewards/margins": 1.4546840190887451, "rewards/rejected": -1.6291239261627197, "step": 4998 }, { "epoch": 0.58, "learning_rate": 1.2905302586913261e-07, "logits/chosen": -3.1659250259399414, "logits/rejected": -3.040562629699707, "logps/chosen": -401.05560302734375, "logps/rejected": -330.4620666503906, "loss": 0.7975, "rewards/accuracies": 0.5, "rewards/chosen": -0.8107323050498962, "rewards/margins": 0.6146043539047241, "rewards/rejected": -1.4253365993499756, "step": 4999 }, { "epoch": 0.58, "learning_rate": 1.2901790939950837e-07, "logits/chosen": -3.0244407653808594, "logits/rejected": -2.946230888366699, "logps/chosen": -354.1905517578125, "logps/rejected": -312.01837158203125, "loss": 0.7752, "rewards/accuracies": 0.75, "rewards/chosen": -0.22785690426826477, "rewards/margins": 0.7793235182762146, "rewards/rejected": -1.0071804523468018, "step": 5000 }, { "epoch": 0.58, "eval_logits/chosen": -2.837320327758789, "eval_logits/rejected": -2.79986572265625, "eval_logps/chosen": -293.6067199707031, "eval_logps/rejected": -237.07310485839844, "eval_loss": 0.42721131443977356, "eval_rewards/accuracies": 0.8142856955528259, "eval_rewards/chosen": 0.044821999967098236, "eval_rewards/margins": 1.3178679943084717, "eval_rewards/rejected": -1.2730460166931152, "eval_runtime": 32.6061, "eval_samples_per_second": 2.147, "eval_steps_per_second": 1.073, "step": 5000 }, { "epoch": 0.58, "learning_rate": 1.2898279292988412e-07, "logits/chosen": -3.196345567703247, "logits/rejected": -3.465787410736084, "logps/chosen": -172.88491821289062, "logps/rejected": -211.34996032714844, "loss": 0.2597, "rewards/accuracies": 0.875, "rewards/chosen": -0.10982292890548706, "rewards/margins": 1.9138081073760986, "rewards/rejected": -2.0236310958862305, "step": 5001 }, { "epoch": 0.58, "learning_rate": 1.2894767646025985e-07, "logits/chosen": -2.886819362640381, "logits/rejected": -2.9753036499023438, "logps/chosen": -343.15582275390625, "logps/rejected": -163.3333740234375, "loss": 0.5452, "rewards/accuracies": 0.75, "rewards/chosen": -0.5525258183479309, "rewards/margins": 1.1655480861663818, "rewards/rejected": -1.718073844909668, "step": 5002 }, { "epoch": 0.58, "learning_rate": 1.289125599906356e-07, "logits/chosen": -3.0178840160369873, "logits/rejected": -3.0405256748199463, "logps/chosen": -171.6208038330078, "logps/rejected": -252.6309814453125, "loss": 0.2355, "rewards/accuracies": 0.875, "rewards/chosen": 0.33177024126052856, "rewards/margins": 2.49822998046875, "rewards/rejected": -2.166459798812866, "step": 5003 }, { "epoch": 0.58, "learning_rate": 1.2887744352101136e-07, "logits/chosen": -3.2035675048828125, "logits/rejected": -3.2850818634033203, "logps/chosen": -147.7393798828125, "logps/rejected": -265.4011535644531, "loss": 0.2684, "rewards/accuracies": 1.0, "rewards/chosen": 0.2587752640247345, "rewards/margins": 3.088487148284912, "rewards/rejected": -2.8297119140625, "step": 5004 }, { "epoch": 0.58, "learning_rate": 1.2884232705138709e-07, "logits/chosen": -3.3158082962036133, "logits/rejected": -3.358936309814453, "logps/chosen": -437.8317565917969, "logps/rejected": -259.3781433105469, "loss": 0.3682, "rewards/accuracies": 0.625, "rewards/chosen": 0.5269705653190613, "rewards/margins": 2.456503391265869, "rewards/rejected": -1.929532766342163, "step": 5005 }, { "epoch": 0.58, "learning_rate": 1.2880721058176284e-07, "logits/chosen": -2.594208002090454, "logits/rejected": -2.533041000366211, "logps/chosen": -216.81845092773438, "logps/rejected": -167.12594604492188, "loss": 0.2341, "rewards/accuracies": 0.875, "rewards/chosen": 0.8146194219589233, "rewards/margins": 2.0637366771698, "rewards/rejected": -1.2491172552108765, "step": 5006 }, { "epoch": 0.58, "learning_rate": 1.287720941121386e-07, "logits/chosen": -2.6467738151550293, "logits/rejected": -2.8627889156341553, "logps/chosen": -343.0054016113281, "logps/rejected": -226.44276428222656, "loss": 0.5019, "rewards/accuracies": 0.5, "rewards/chosen": 0.07453954219818115, "rewards/margins": 1.3627009391784668, "rewards/rejected": -1.288161277770996, "step": 5007 }, { "epoch": 0.58, "learning_rate": 1.2873697764251435e-07, "logits/chosen": -3.085057020187378, "logits/rejected": -3.11794376373291, "logps/chosen": -385.4532165527344, "logps/rejected": -305.91650390625, "loss": 0.4043, "rewards/accuracies": 0.75, "rewards/chosen": -0.20448081195354462, "rewards/margins": 1.4547417163848877, "rewards/rejected": -1.6592226028442383, "step": 5008 }, { "epoch": 0.58, "learning_rate": 1.2870186117289007e-07, "logits/chosen": -2.9463887214660645, "logits/rejected": -3.1767072677612305, "logps/chosen": -131.1376190185547, "logps/rejected": -215.8402099609375, "loss": 0.25, "rewards/accuracies": 1.0, "rewards/chosen": -0.18317821621894836, "rewards/margins": 2.2946653366088867, "rewards/rejected": -2.4778435230255127, "step": 5009 }, { "epoch": 0.58, "learning_rate": 1.2866674470326583e-07, "logits/chosen": -3.422701597213745, "logits/rejected": -3.350619077682495, "logps/chosen": -360.34503173828125, "logps/rejected": -301.46795654296875, "loss": 0.2086, "rewards/accuracies": 0.875, "rewards/chosen": 0.22531238198280334, "rewards/margins": 2.7226028442382812, "rewards/rejected": -2.4972903728485107, "step": 5010 }, { "epoch": 0.58, "learning_rate": 1.2863162823364156e-07, "logits/chosen": -3.394075632095337, "logits/rejected": -3.5729434490203857, "logps/chosen": -297.84710693359375, "logps/rejected": -318.5992736816406, "loss": 0.2556, "rewards/accuracies": 0.75, "rewards/chosen": 1.0665550231933594, "rewards/margins": 2.5473179817199707, "rewards/rejected": -1.4807628393173218, "step": 5011 }, { "epoch": 0.58, "learning_rate": 1.2859651176401734e-07, "logits/chosen": -3.83728289604187, "logits/rejected": -3.7644519805908203, "logps/chosen": -274.34893798828125, "logps/rejected": -223.5430145263672, "loss": 0.1591, "rewards/accuracies": 1.0, "rewards/chosen": 0.6592786312103271, "rewards/margins": 2.5636937618255615, "rewards/rejected": -1.9044151306152344, "step": 5012 }, { "epoch": 0.58, "learning_rate": 1.2856139529439306e-07, "logits/chosen": -2.727691650390625, "logits/rejected": -2.5819878578186035, "logps/chosen": -257.12420654296875, "logps/rejected": -235.3209991455078, "loss": 0.4319, "rewards/accuracies": 0.875, "rewards/chosen": 0.24498219788074493, "rewards/margins": 1.3439409732818604, "rewards/rejected": -1.0989587306976318, "step": 5013 }, { "epoch": 0.58, "learning_rate": 1.2852627882476882e-07, "logits/chosen": -2.499825954437256, "logits/rejected": -2.466615676879883, "logps/chosen": -159.66748046875, "logps/rejected": -204.96197509765625, "loss": 0.2702, "rewards/accuracies": 0.875, "rewards/chosen": 0.10663923621177673, "rewards/margins": 1.5640869140625, "rewards/rejected": -1.4574477672576904, "step": 5014 }, { "epoch": 0.58, "learning_rate": 1.2849116235514454e-07, "logits/chosen": -3.636932611465454, "logits/rejected": -3.9565200805664062, "logps/chosen": -139.72499084472656, "logps/rejected": -183.90576171875, "loss": 0.3288, "rewards/accuracies": 0.75, "rewards/chosen": 0.17759914696216583, "rewards/margins": 2.6581737995147705, "rewards/rejected": -2.480574607849121, "step": 5015 }, { "epoch": 0.58, "learning_rate": 1.284560458855203e-07, "logits/chosen": -2.714207172393799, "logits/rejected": -2.934499740600586, "logps/chosen": -375.4806823730469, "logps/rejected": -342.8158264160156, "loss": 0.5292, "rewards/accuracies": 0.75, "rewards/chosen": -0.19260017573833466, "rewards/margins": 0.6767192482948303, "rewards/rejected": -0.8693193793296814, "step": 5016 }, { "epoch": 0.58, "learning_rate": 1.2842092941589605e-07, "logits/chosen": -2.6220529079437256, "logits/rejected": -2.760239601135254, "logps/chosen": -262.85760498046875, "logps/rejected": -237.30552673339844, "loss": 0.502, "rewards/accuracies": 0.875, "rewards/chosen": -0.3737100064754486, "rewards/margins": 0.5180554389953613, "rewards/rejected": -0.8917654752731323, "step": 5017 }, { "epoch": 0.58, "learning_rate": 1.283858129462718e-07, "logits/chosen": -3.041126251220703, "logits/rejected": -3.422842264175415, "logps/chosen": -279.63323974609375, "logps/rejected": -450.98590087890625, "loss": 0.5608, "rewards/accuracies": 0.75, "rewards/chosen": -0.3456031382083893, "rewards/margins": 0.9249062538146973, "rewards/rejected": -1.2705093622207642, "step": 5018 }, { "epoch": 0.58, "learning_rate": 1.2835069647664753e-07, "logits/chosen": -3.0662806034088135, "logits/rejected": -2.848104953765869, "logps/chosen": -291.6426696777344, "logps/rejected": -248.36431884765625, "loss": 0.4162, "rewards/accuracies": 0.875, "rewards/chosen": -0.00980764627456665, "rewards/margins": 1.4155420064926147, "rewards/rejected": -1.4253497123718262, "step": 5019 }, { "epoch": 0.58, "learning_rate": 1.283155800070233e-07, "logits/chosen": -2.800686836242676, "logits/rejected": -2.7071354389190674, "logps/chosen": -325.3921813964844, "logps/rejected": -228.54530334472656, "loss": 0.2382, "rewards/accuracies": 1.0, "rewards/chosen": 0.6018565893173218, "rewards/margins": 2.194550037384033, "rewards/rejected": -1.5926933288574219, "step": 5020 }, { "epoch": 0.58, "learning_rate": 1.2828046353739904e-07, "logits/chosen": -2.2432100772857666, "logits/rejected": -2.250680446624756, "logps/chosen": -487.625244140625, "logps/rejected": -343.1990051269531, "loss": 0.2412, "rewards/accuracies": 1.0, "rewards/chosen": 0.3829605281352997, "rewards/margins": 1.7036561965942383, "rewards/rejected": -1.3206956386566162, "step": 5021 }, { "epoch": 0.58, "learning_rate": 1.2824534706777477e-07, "logits/chosen": -3.2176921367645264, "logits/rejected": -3.1002306938171387, "logps/chosen": -174.8712921142578, "logps/rejected": -260.817138671875, "loss": 0.4185, "rewards/accuracies": 0.875, "rewards/chosen": 0.18640320003032684, "rewards/margins": 2.413351058959961, "rewards/rejected": -2.226947784423828, "step": 5022 }, { "epoch": 0.58, "learning_rate": 1.2821023059815052e-07, "logits/chosen": -3.8915762901306152, "logits/rejected": -3.7809152603149414, "logps/chosen": -310.43829345703125, "logps/rejected": -226.51119995117188, "loss": 0.4476, "rewards/accuracies": 0.75, "rewards/chosen": 0.1441909670829773, "rewards/margins": 1.0664031505584717, "rewards/rejected": -0.9222121238708496, "step": 5023 }, { "epoch": 0.58, "learning_rate": 1.2817511412852628e-07, "logits/chosen": -3.3521389961242676, "logits/rejected": -3.1616625785827637, "logps/chosen": -225.028076171875, "logps/rejected": -292.8263244628906, "loss": 0.6162, "rewards/accuracies": 0.625, "rewards/chosen": -0.22215688228607178, "rewards/margins": 1.2828539609909058, "rewards/rejected": -1.5050108432769775, "step": 5024 }, { "epoch": 0.58, "learning_rate": 1.2813999765890203e-07, "logits/chosen": -2.570096015930176, "logits/rejected": -2.611603260040283, "logps/chosen": -580.6859130859375, "logps/rejected": -350.2803955078125, "loss": 0.2718, "rewards/accuracies": 0.875, "rewards/chosen": 0.7026288509368896, "rewards/margins": 1.7139778137207031, "rewards/rejected": -1.0113489627838135, "step": 5025 }, { "epoch": 0.58, "learning_rate": 1.2810488118927776e-07, "logits/chosen": -2.9622299671173096, "logits/rejected": -2.8956992626190186, "logps/chosen": -233.87106323242188, "logps/rejected": -294.0300598144531, "loss": 0.2815, "rewards/accuracies": 0.875, "rewards/chosen": -0.5673831105232239, "rewards/margins": 2.315446376800537, "rewards/rejected": -2.882829427719116, "step": 5026 }, { "epoch": 0.58, "learning_rate": 1.280697647196535e-07, "logits/chosen": -2.5463812351226807, "logits/rejected": -2.6340372562408447, "logps/chosen": -441.39752197265625, "logps/rejected": -286.4170227050781, "loss": 0.2949, "rewards/accuracies": 0.875, "rewards/chosen": -0.41872280836105347, "rewards/margins": 1.9148333072662354, "rewards/rejected": -2.3335559368133545, "step": 5027 }, { "epoch": 0.58, "learning_rate": 1.2803464825002926e-07, "logits/chosen": -2.8648364543914795, "logits/rejected": -3.0014097690582275, "logps/chosen": -255.07516479492188, "logps/rejected": -400.2191162109375, "loss": 0.4424, "rewards/accuracies": 0.75, "rewards/chosen": 0.1512080281972885, "rewards/margins": 2.0829834938049316, "rewards/rejected": -1.9317755699157715, "step": 5028 }, { "epoch": 0.58, "learning_rate": 1.2799953178040502e-07, "logits/chosen": -3.2900381088256836, "logits/rejected": -3.147045850753784, "logps/chosen": -201.22671508789062, "logps/rejected": -220.6361846923828, "loss": 0.314, "rewards/accuracies": 0.875, "rewards/chosen": -0.4388643205165863, "rewards/margins": 1.5479562282562256, "rewards/rejected": -1.9868204593658447, "step": 5029 }, { "epoch": 0.58, "learning_rate": 1.2796441531078075e-07, "logits/chosen": -3.60080885887146, "logits/rejected": -3.733241081237793, "logps/chosen": -240.65692138671875, "logps/rejected": -255.76438903808594, "loss": 0.2361, "rewards/accuracies": 0.75, "rewards/chosen": 0.4005383849143982, "rewards/margins": 2.566586971282959, "rewards/rejected": -2.166048526763916, "step": 5030 }, { "epoch": 0.58, "learning_rate": 1.279292988411565e-07, "logits/chosen": -3.122959852218628, "logits/rejected": -3.2530298233032227, "logps/chosen": -201.73202514648438, "logps/rejected": -179.4698944091797, "loss": 0.5353, "rewards/accuracies": 0.625, "rewards/chosen": -0.7513167858123779, "rewards/margins": 1.6176173686981201, "rewards/rejected": -2.368934154510498, "step": 5031 }, { "epoch": 0.58, "learning_rate": 1.2789418237153225e-07, "logits/chosen": -3.2062222957611084, "logits/rejected": -3.208982229232788, "logps/chosen": -123.01377868652344, "logps/rejected": -133.63888549804688, "loss": 0.2888, "rewards/accuracies": 1.0, "rewards/chosen": -0.1813463270664215, "rewards/margins": 1.688930869102478, "rewards/rejected": -1.8702771663665771, "step": 5032 }, { "epoch": 0.58, "learning_rate": 1.2785906590190798e-07, "logits/chosen": -3.83549427986145, "logits/rejected": -3.7102231979370117, "logps/chosen": -224.90518188476562, "logps/rejected": -209.16561889648438, "loss": 0.3337, "rewards/accuracies": 1.0, "rewards/chosen": 0.06444337964057922, "rewards/margins": 1.5488038063049316, "rewards/rejected": -1.4843604564666748, "step": 5033 }, { "epoch": 0.58, "learning_rate": 1.2782394943228374e-07, "logits/chosen": -3.3604860305786133, "logits/rejected": -3.398679733276367, "logps/chosen": -354.6896667480469, "logps/rejected": -321.07269287109375, "loss": 0.2954, "rewards/accuracies": 0.75, "rewards/chosen": 0.05834078788757324, "rewards/margins": 2.6479005813598633, "rewards/rejected": -2.58955979347229, "step": 5034 }, { "epoch": 0.58, "learning_rate": 1.277888329626595e-07, "logits/chosen": -2.6741292476654053, "logits/rejected": -2.771068572998047, "logps/chosen": -317.76690673828125, "logps/rejected": -236.85256958007812, "loss": 0.4228, "rewards/accuracies": 0.75, "rewards/chosen": -0.2403039038181305, "rewards/margins": 1.4570119380950928, "rewards/rejected": -1.6973159313201904, "step": 5035 }, { "epoch": 0.58, "learning_rate": 1.2775371649303524e-07, "logits/chosen": -3.3747763633728027, "logits/rejected": -3.176621437072754, "logps/chosen": -184.29248046875, "logps/rejected": -220.4187469482422, "loss": 0.2022, "rewards/accuracies": 1.0, "rewards/chosen": 0.3587665557861328, "rewards/margins": 2.018613815307617, "rewards/rejected": -1.6598472595214844, "step": 5036 }, { "epoch": 0.58, "learning_rate": 1.2771860002341097e-07, "logits/chosen": -2.7813496589660645, "logits/rejected": -2.5995054244995117, "logps/chosen": -382.8707275390625, "logps/rejected": -281.0979919433594, "loss": 0.2385, "rewards/accuracies": 0.875, "rewards/chosen": 0.25480368733406067, "rewards/margins": 2.0126893520355225, "rewards/rejected": -1.7578855752944946, "step": 5037 }, { "epoch": 0.58, "learning_rate": 1.2768348355378672e-07, "logits/chosen": -2.7973992824554443, "logits/rejected": -3.121063709259033, "logps/chosen": -276.9651794433594, "logps/rejected": -197.33160400390625, "loss": 0.4069, "rewards/accuracies": 0.75, "rewards/chosen": -0.38399460911750793, "rewards/margins": 1.3929458856582642, "rewards/rejected": -1.7769403457641602, "step": 5038 }, { "epoch": 0.58, "learning_rate": 1.2764836708416245e-07, "logits/chosen": -3.359433650970459, "logits/rejected": -3.6237974166870117, "logps/chosen": -163.86814880371094, "logps/rejected": -303.46124267578125, "loss": 0.1328, "rewards/accuracies": 1.0, "rewards/chosen": 0.5372501015663147, "rewards/margins": 3.1151843070983887, "rewards/rejected": -2.5779342651367188, "step": 5039 }, { "epoch": 0.58, "learning_rate": 1.2761325061453823e-07, "logits/chosen": -3.3076095581054688, "logits/rejected": -3.6929259300231934, "logps/chosen": -141.2142333984375, "logps/rejected": -267.79583740234375, "loss": 0.2649, "rewards/accuracies": 0.875, "rewards/chosen": 0.1145787239074707, "rewards/margins": 2.82140851020813, "rewards/rejected": -2.706829786300659, "step": 5040 }, { "epoch": 0.58, "learning_rate": 1.2757813414491396e-07, "logits/chosen": -3.026960849761963, "logits/rejected": -3.09399151802063, "logps/chosen": -523.8560180664062, "logps/rejected": -365.1963806152344, "loss": 0.5293, "rewards/accuracies": 0.5, "rewards/chosen": -0.16027438640594482, "rewards/margins": 0.6240905523300171, "rewards/rejected": -0.7843649387359619, "step": 5041 }, { "epoch": 0.58, "learning_rate": 1.275430176752897e-07, "logits/chosen": -3.7586920261383057, "logits/rejected": -3.2695913314819336, "logps/chosen": -382.9727783203125, "logps/rejected": -294.2950439453125, "loss": 0.3162, "rewards/accuracies": 1.0, "rewards/chosen": 0.33849620819091797, "rewards/margins": 1.2544217109680176, "rewards/rejected": -0.9159255027770996, "step": 5042 }, { "epoch": 0.58, "learning_rate": 1.2750790120566544e-07, "logits/chosen": -3.217665910720825, "logits/rejected": -3.3796916007995605, "logps/chosen": -150.95706176757812, "logps/rejected": -327.947021484375, "loss": 0.2167, "rewards/accuracies": 1.0, "rewards/chosen": -0.27542123198509216, "rewards/margins": 2.630263328552246, "rewards/rejected": -2.905684471130371, "step": 5043 }, { "epoch": 0.58, "learning_rate": 1.274727847360412e-07, "logits/chosen": -3.571077823638916, "logits/rejected": -3.3481037616729736, "logps/chosen": -203.13087463378906, "logps/rejected": -212.49935913085938, "loss": 0.997, "rewards/accuracies": 0.5, "rewards/chosen": -0.9830268025398254, "rewards/margins": 1.4347411394119263, "rewards/rejected": -2.4177680015563965, "step": 5044 }, { "epoch": 0.58, "learning_rate": 1.2743766826641695e-07, "logits/chosen": -2.69978666305542, "logits/rejected": -3.1136157512664795, "logps/chosen": -151.898193359375, "logps/rejected": -242.90869140625, "loss": 0.4703, "rewards/accuracies": 0.75, "rewards/chosen": -0.1295502781867981, "rewards/margins": 2.659722328186035, "rewards/rejected": -2.7892727851867676, "step": 5045 }, { "epoch": 0.58, "learning_rate": 1.274025517967927e-07, "logits/chosen": -3.000504493713379, "logits/rejected": -3.0767440795898438, "logps/chosen": -298.09686279296875, "logps/rejected": -349.932373046875, "loss": 0.2609, "rewards/accuracies": 0.875, "rewards/chosen": 0.13690738379955292, "rewards/margins": 2.0991227626800537, "rewards/rejected": -1.962215542793274, "step": 5046 }, { "epoch": 0.58, "learning_rate": 1.2736743532716843e-07, "logits/chosen": -2.9589009284973145, "logits/rejected": -2.848604679107666, "logps/chosen": -301.490478515625, "logps/rejected": -225.5598602294922, "loss": 0.6178, "rewards/accuracies": 0.625, "rewards/chosen": -0.4710807800292969, "rewards/margins": 0.9879388213157654, "rewards/rejected": -1.459019660949707, "step": 5047 }, { "epoch": 0.58, "learning_rate": 1.2733231885754418e-07, "logits/chosen": -3.125692844390869, "logits/rejected": -2.695746898651123, "logps/chosen": -283.38897705078125, "logps/rejected": -255.02989196777344, "loss": 0.0955, "rewards/accuracies": 1.0, "rewards/chosen": 0.3765089511871338, "rewards/margins": 3.6900973320007324, "rewards/rejected": -3.3135886192321777, "step": 5048 }, { "epoch": 0.58, "learning_rate": 1.2729720238791994e-07, "logits/chosen": -3.4725213050842285, "logits/rejected": -3.8035953044891357, "logps/chosen": -241.58206176757812, "logps/rejected": -209.90032958984375, "loss": 0.3333, "rewards/accuracies": 0.75, "rewards/chosen": 0.6175392270088196, "rewards/margins": 2.5831849575042725, "rewards/rejected": -1.9656457901000977, "step": 5049 }, { "epoch": 0.58, "learning_rate": 1.2726208591829566e-07, "logits/chosen": -3.6497132778167725, "logits/rejected": -3.5948026180267334, "logps/chosen": -180.8606414794922, "logps/rejected": -144.51934814453125, "loss": 0.4962, "rewards/accuracies": 0.875, "rewards/chosen": -0.27254343032836914, "rewards/margins": 1.9792249202728271, "rewards/rejected": -2.2517683506011963, "step": 5050 }, { "epoch": 0.58, "learning_rate": 1.2722696944867142e-07, "logits/chosen": -3.6335620880126953, "logits/rejected": -3.6673502922058105, "logps/chosen": -157.08267211914062, "logps/rejected": -185.21987915039062, "loss": 0.3749, "rewards/accuracies": 0.875, "rewards/chosen": 0.2823788821697235, "rewards/margins": 2.3970425128936768, "rewards/rejected": -2.114663600921631, "step": 5051 }, { "epoch": 0.58, "learning_rate": 1.2719185297904717e-07, "logits/chosen": -3.716050863265991, "logits/rejected": -3.6318862438201904, "logps/chosen": -276.47503662109375, "logps/rejected": -182.00587463378906, "loss": 0.4966, "rewards/accuracies": 0.75, "rewards/chosen": -0.43334800004959106, "rewards/margins": 0.7634434700012207, "rewards/rejected": -1.196791410446167, "step": 5052 }, { "epoch": 0.58, "learning_rate": 1.2715673650942293e-07, "logits/chosen": -4.0758585929870605, "logits/rejected": -3.609835624694824, "logps/chosen": -272.31866455078125, "logps/rejected": -243.53866577148438, "loss": 0.2366, "rewards/accuracies": 0.875, "rewards/chosen": 0.2793155908584595, "rewards/margins": 2.5818521976470947, "rewards/rejected": -2.3025364875793457, "step": 5053 }, { "epoch": 0.58, "learning_rate": 1.2712162003979865e-07, "logits/chosen": -3.664198398590088, "logits/rejected": -3.5196340084075928, "logps/chosen": -253.3223876953125, "logps/rejected": -179.81265258789062, "loss": 0.7124, "rewards/accuracies": 0.75, "rewards/chosen": -0.1484735608100891, "rewards/margins": 1.009164571762085, "rewards/rejected": -1.1576380729675293, "step": 5054 }, { "epoch": 0.58, "learning_rate": 1.270865035701744e-07, "logits/chosen": -3.508932590484619, "logits/rejected": -3.3474268913269043, "logps/chosen": -380.0367736816406, "logps/rejected": -314.1165466308594, "loss": 0.377, "rewards/accuracies": 0.875, "rewards/chosen": -0.14284448325634003, "rewards/margins": 2.476856231689453, "rewards/rejected": -2.6197004318237305, "step": 5055 }, { "epoch": 0.58, "learning_rate": 1.2705138710055013e-07, "logits/chosen": -3.9125895500183105, "logits/rejected": -3.9237680435180664, "logps/chosen": -184.95849609375, "logps/rejected": -173.5776824951172, "loss": 0.616, "rewards/accuracies": 0.75, "rewards/chosen": -0.46787717938423157, "rewards/margins": 0.4966202974319458, "rewards/rejected": -0.964497447013855, "step": 5056 }, { "epoch": 0.58, "learning_rate": 1.2701627063092591e-07, "logits/chosen": -3.0725293159484863, "logits/rejected": -2.801165819168091, "logps/chosen": -288.439208984375, "logps/rejected": -215.64703369140625, "loss": 0.6249, "rewards/accuracies": 0.625, "rewards/chosen": -0.3049061894416809, "rewards/margins": 1.1041843891143799, "rewards/rejected": -1.4090906381607056, "step": 5057 }, { "epoch": 0.58, "learning_rate": 1.2698115416130164e-07, "logits/chosen": -3.1248087882995605, "logits/rejected": -3.2253081798553467, "logps/chosen": -119.39644622802734, "logps/rejected": -144.3485107421875, "loss": 0.4683, "rewards/accuracies": 0.625, "rewards/chosen": -0.3015996217727661, "rewards/margins": 2.0588626861572266, "rewards/rejected": -2.3604624271392822, "step": 5058 }, { "epoch": 0.58, "learning_rate": 1.269460376916774e-07, "logits/chosen": -2.791971445083618, "logits/rejected": -2.7963523864746094, "logps/chosen": -323.49853515625, "logps/rejected": -192.822021484375, "loss": 0.4968, "rewards/accuracies": 0.75, "rewards/chosen": 0.6141326427459717, "rewards/margins": 1.7042126655578613, "rewards/rejected": -1.0900801420211792, "step": 5059 }, { "epoch": 0.58, "learning_rate": 1.2691092122205312e-07, "logits/chosen": -3.592034101486206, "logits/rejected": -3.8011045455932617, "logps/chosen": -236.0926513671875, "logps/rejected": -362.00421142578125, "loss": 0.1288, "rewards/accuracies": 1.0, "rewards/chosen": -0.5348492860794067, "rewards/margins": 2.520763397216797, "rewards/rejected": -3.055612564086914, "step": 5060 }, { "epoch": 0.58, "learning_rate": 1.2687580475242888e-07, "logits/chosen": -2.9570603370666504, "logits/rejected": -2.8587281703948975, "logps/chosen": -179.15188598632812, "logps/rejected": -219.90992736816406, "loss": 0.439, "rewards/accuracies": 0.75, "rewards/chosen": 0.27063390612602234, "rewards/margins": 2.301243543624878, "rewards/rejected": -2.030609607696533, "step": 5061 }, { "epoch": 0.58, "learning_rate": 1.2684068828280463e-07, "logits/chosen": -3.278244972229004, "logits/rejected": -3.2030293941497803, "logps/chosen": -170.67337036132812, "logps/rejected": -204.93435668945312, "loss": 0.4728, "rewards/accuracies": 0.75, "rewards/chosen": -0.530208945274353, "rewards/margins": 1.1478404998779297, "rewards/rejected": -1.6780494451522827, "step": 5062 }, { "epoch": 0.58, "learning_rate": 1.2680557181318039e-07, "logits/chosen": -3.3655543327331543, "logits/rejected": -3.226430892944336, "logps/chosen": -276.15582275390625, "logps/rejected": -237.97308349609375, "loss": 0.3083, "rewards/accuracies": 0.875, "rewards/chosen": 0.12420712411403656, "rewards/margins": 1.7512704133987427, "rewards/rejected": -1.627063274383545, "step": 5063 }, { "epoch": 0.58, "learning_rate": 1.267704553435561e-07, "logits/chosen": -3.6587796211242676, "logits/rejected": -3.214343786239624, "logps/chosen": -201.4375762939453, "logps/rejected": -124.67745208740234, "loss": 0.2668, "rewards/accuracies": 0.875, "rewards/chosen": 1.1228506565093994, "rewards/margins": 1.8152803182601929, "rewards/rejected": -0.6924296617507935, "step": 5064 }, { "epoch": 0.58, "learning_rate": 1.2673533887393187e-07, "logits/chosen": -2.635091781616211, "logits/rejected": -2.7422800064086914, "logps/chosen": -413.92254638671875, "logps/rejected": -427.0927429199219, "loss": 0.4357, "rewards/accuracies": 0.75, "rewards/chosen": 0.266759991645813, "rewards/margins": 1.4970612525939941, "rewards/rejected": -1.2303013801574707, "step": 5065 }, { "epoch": 0.58, "learning_rate": 1.2670022240430762e-07, "logits/chosen": -2.5339131355285645, "logits/rejected": -2.4504597187042236, "logps/chosen": -274.673583984375, "logps/rejected": -346.19049072265625, "loss": 0.5285, "rewards/accuracies": 0.75, "rewards/chosen": -0.20619097352027893, "rewards/margins": 1.6395108699798584, "rewards/rejected": -1.8457016944885254, "step": 5066 }, { "epoch": 0.58, "learning_rate": 1.2666510593468335e-07, "logits/chosen": -2.6928839683532715, "logits/rejected": -2.690260171890259, "logps/chosen": -432.06317138671875, "logps/rejected": -352.77777099609375, "loss": 0.3691, "rewards/accuracies": 0.875, "rewards/chosen": 0.36328935623168945, "rewards/margins": 4.899564266204834, "rewards/rejected": -4.5362749099731445, "step": 5067 }, { "epoch": 0.58, "learning_rate": 1.266299894650591e-07, "logits/chosen": -2.850738286972046, "logits/rejected": -2.9828696250915527, "logps/chosen": -520.4832153320312, "logps/rejected": -397.86468505859375, "loss": 0.3542, "rewards/accuracies": 0.75, "rewards/chosen": -0.15189455449581146, "rewards/margins": 2.4175562858581543, "rewards/rejected": -2.569450616836548, "step": 5068 }, { "epoch": 0.58, "learning_rate": 1.2659487299543486e-07, "logits/chosen": -3.745903491973877, "logits/rejected": -3.9414281845092773, "logps/chosen": -240.80059814453125, "logps/rejected": -315.74066162109375, "loss": 0.4399, "rewards/accuracies": 0.75, "rewards/chosen": -0.17542952299118042, "rewards/margins": 2.731564998626709, "rewards/rejected": -2.906994342803955, "step": 5069 }, { "epoch": 0.58, "learning_rate": 1.265597565258106e-07, "logits/chosen": -2.5919289588928223, "logits/rejected": -3.0560688972473145, "logps/chosen": -247.66384887695312, "logps/rejected": -288.96783447265625, "loss": 0.3724, "rewards/accuracies": 0.75, "rewards/chosen": -0.14901769161224365, "rewards/margins": 1.90613853931427, "rewards/rejected": -2.0551562309265137, "step": 5070 }, { "epoch": 0.58, "learning_rate": 1.2652464005618634e-07, "logits/chosen": -2.4298906326293945, "logits/rejected": -2.5612735748291016, "logps/chosen": -296.75848388671875, "logps/rejected": -280.7152099609375, "loss": 0.6773, "rewards/accuracies": 0.625, "rewards/chosen": -0.4941185712814331, "rewards/margins": 1.4057281017303467, "rewards/rejected": -1.8998465538024902, "step": 5071 }, { "epoch": 0.58, "learning_rate": 1.264895235865621e-07, "logits/chosen": -2.7841343879699707, "logits/rejected": -2.810114860534668, "logps/chosen": -172.5916290283203, "logps/rejected": -232.9892120361328, "loss": 0.4604, "rewards/accuracies": 0.75, "rewards/chosen": 0.2140403836965561, "rewards/margins": 2.355552911758423, "rewards/rejected": -2.141512393951416, "step": 5072 }, { "epoch": 0.58, "learning_rate": 1.2645440711693784e-07, "logits/chosen": -2.7614309787750244, "logits/rejected": -2.7017533779144287, "logps/chosen": -236.35214233398438, "logps/rejected": -265.38330078125, "loss": 0.4667, "rewards/accuracies": 0.75, "rewards/chosen": -0.3127371072769165, "rewards/margins": 1.2616267204284668, "rewards/rejected": -1.5743638277053833, "step": 5073 }, { "epoch": 0.58, "learning_rate": 1.264192906473136e-07, "logits/chosen": -3.6879162788391113, "logits/rejected": -3.875938653945923, "logps/chosen": -343.4891357421875, "logps/rejected": -337.6551208496094, "loss": 0.592, "rewards/accuracies": 0.5, "rewards/chosen": -0.19598700106143951, "rewards/margins": 1.043034553527832, "rewards/rejected": -1.2390215396881104, "step": 5074 }, { "epoch": 0.59, "learning_rate": 1.2638417417768933e-07, "logits/chosen": -3.291682720184326, "logits/rejected": -3.2164254188537598, "logps/chosen": -271.5522766113281, "logps/rejected": -161.20501708984375, "loss": 0.3629, "rewards/accuracies": 1.0, "rewards/chosen": 0.16440042853355408, "rewards/margins": 1.5468573570251465, "rewards/rejected": -1.3824567794799805, "step": 5075 }, { "epoch": 0.59, "learning_rate": 1.2634905770806508e-07, "logits/chosen": -2.965287446975708, "logits/rejected": -3.394080400466919, "logps/chosen": -278.1961364746094, "logps/rejected": -269.2933349609375, "loss": 0.4719, "rewards/accuracies": 0.625, "rewards/chosen": -0.5391241908073425, "rewards/margins": 1.36617112159729, "rewards/rejected": -1.9052952527999878, "step": 5076 }, { "epoch": 0.59, "learning_rate": 1.2631394123844083e-07, "logits/chosen": -2.8547449111938477, "logits/rejected": -3.0556201934814453, "logps/chosen": -305.9022216796875, "logps/rejected": -224.9374237060547, "loss": 0.1149, "rewards/accuracies": 1.0, "rewards/chosen": 0.6179894804954529, "rewards/margins": 3.270540237426758, "rewards/rejected": -2.6525509357452393, "step": 5077 }, { "epoch": 0.59, "learning_rate": 1.2627882476881656e-07, "logits/chosen": -3.004833698272705, "logits/rejected": -2.725419759750366, "logps/chosen": -198.72137451171875, "logps/rejected": -232.59539794921875, "loss": 0.349, "rewards/accuracies": 0.625, "rewards/chosen": -0.2596345543861389, "rewards/margins": 2.522596597671509, "rewards/rejected": -2.782231330871582, "step": 5078 }, { "epoch": 0.59, "learning_rate": 1.2624370829919231e-07, "logits/chosen": -2.3458752632141113, "logits/rejected": -2.365662097930908, "logps/chosen": -251.01634216308594, "logps/rejected": -245.94439697265625, "loss": 0.5736, "rewards/accuracies": 0.625, "rewards/chosen": -0.36321520805358887, "rewards/margins": 0.9257047176361084, "rewards/rejected": -1.2889199256896973, "step": 5079 }, { "epoch": 0.59, "learning_rate": 1.2620859182956807e-07, "logits/chosen": -2.771125316619873, "logits/rejected": -2.6224145889282227, "logps/chosen": -395.3077087402344, "logps/rejected": -211.20298767089844, "loss": 0.5588, "rewards/accuracies": 0.5, "rewards/chosen": -0.6337928771972656, "rewards/margins": 0.8307833671569824, "rewards/rejected": -1.4645761251449585, "step": 5080 }, { "epoch": 0.59, "learning_rate": 1.2617347535994382e-07, "logits/chosen": -2.65049409866333, "logits/rejected": -2.061173439025879, "logps/chosen": -174.20001220703125, "logps/rejected": -254.60610961914062, "loss": 0.3058, "rewards/accuracies": 1.0, "rewards/chosen": -0.05493832379579544, "rewards/margins": 2.1030774116516113, "rewards/rejected": -2.158015727996826, "step": 5081 }, { "epoch": 0.59, "learning_rate": 1.2613835889031955e-07, "logits/chosen": -3.1570653915405273, "logits/rejected": -3.0731801986694336, "logps/chosen": -245.9629669189453, "logps/rejected": -196.310546875, "loss": 0.4268, "rewards/accuracies": 0.75, "rewards/chosen": -0.49099090695381165, "rewards/margins": 0.9770956039428711, "rewards/rejected": -1.4680863618850708, "step": 5082 }, { "epoch": 0.59, "learning_rate": 1.261032424206953e-07, "logits/chosen": -3.55169939994812, "logits/rejected": -3.4565110206604004, "logps/chosen": -257.9891357421875, "logps/rejected": -342.3607482910156, "loss": 0.7543, "rewards/accuracies": 0.625, "rewards/chosen": -1.1295592784881592, "rewards/margins": 0.920102596282959, "rewards/rejected": -2.049661874771118, "step": 5083 }, { "epoch": 0.59, "learning_rate": 1.2606812595107103e-07, "logits/chosen": -2.961394786834717, "logits/rejected": -3.238657236099243, "logps/chosen": -219.01072692871094, "logps/rejected": -302.8183288574219, "loss": 0.5823, "rewards/accuracies": 0.75, "rewards/chosen": -0.5494468212127686, "rewards/margins": 1.3739678859710693, "rewards/rejected": -1.923414707183838, "step": 5084 }, { "epoch": 0.59, "learning_rate": 1.260330094814468e-07, "logits/chosen": -3.379826784133911, "logits/rejected": -3.6602985858917236, "logps/chosen": -189.70741271972656, "logps/rejected": -310.4198913574219, "loss": 0.2593, "rewards/accuracies": 0.875, "rewards/chosen": 0.21904106438159943, "rewards/margins": 3.63997745513916, "rewards/rejected": -3.420936107635498, "step": 5085 }, { "epoch": 0.59, "learning_rate": 1.2599789301182254e-07, "logits/chosen": -3.975299119949341, "logits/rejected": -3.833606243133545, "logps/chosen": -147.48446655273438, "logps/rejected": -171.4473876953125, "loss": 0.2761, "rewards/accuracies": 0.875, "rewards/chosen": 0.4759521484375, "rewards/margins": 1.5857096910476685, "rewards/rejected": -1.1097575426101685, "step": 5086 }, { "epoch": 0.59, "learning_rate": 1.259627765421983e-07, "logits/chosen": -2.3709487915039062, "logits/rejected": -2.256443500518799, "logps/chosen": -365.72991943359375, "logps/rejected": -353.644287109375, "loss": 0.6442, "rewards/accuracies": 0.75, "rewards/chosen": -0.41030019521713257, "rewards/margins": 0.9880859851837158, "rewards/rejected": -1.3983861207962036, "step": 5087 }, { "epoch": 0.59, "learning_rate": 1.2592766007257402e-07, "logits/chosen": -3.2282509803771973, "logits/rejected": -3.366941213607788, "logps/chosen": -207.70994567871094, "logps/rejected": -237.38648986816406, "loss": 0.4184, "rewards/accuracies": 0.75, "rewards/chosen": -0.054413825273513794, "rewards/margins": 1.370856523513794, "rewards/rejected": -1.4252703189849854, "step": 5088 }, { "epoch": 0.59, "learning_rate": 1.2589254360294977e-07, "logits/chosen": -3.161414623260498, "logits/rejected": -2.958066463470459, "logps/chosen": -234.46304321289062, "logps/rejected": -158.98114013671875, "loss": 0.6888, "rewards/accuracies": 0.5, "rewards/chosen": -0.9040212631225586, "rewards/margins": 0.9930480718612671, "rewards/rejected": -1.8970693349838257, "step": 5089 }, { "epoch": 0.59, "learning_rate": 1.2585742713332553e-07, "logits/chosen": -3.3154497146606445, "logits/rejected": -3.4296998977661133, "logps/chosen": -226.7612762451172, "logps/rejected": -134.78831481933594, "loss": 0.8794, "rewards/accuracies": 0.625, "rewards/chosen": -1.0726767778396606, "rewards/margins": -0.13235639035701752, "rewards/rejected": -0.9403204917907715, "step": 5090 }, { "epoch": 0.59, "learning_rate": 1.2582231066370128e-07, "logits/chosen": -2.833193063735962, "logits/rejected": -3.1983954906463623, "logps/chosen": -358.6549072265625, "logps/rejected": -249.66159057617188, "loss": 0.6428, "rewards/accuracies": 0.625, "rewards/chosen": 0.1164134293794632, "rewards/margins": 0.7592272162437439, "rewards/rejected": -0.6428138017654419, "step": 5091 }, { "epoch": 0.59, "learning_rate": 1.25787194194077e-07, "logits/chosen": -3.092123031616211, "logits/rejected": -3.229050636291504, "logps/chosen": -357.05364990234375, "logps/rejected": -215.02664184570312, "loss": 0.4016, "rewards/accuracies": 0.75, "rewards/chosen": 0.24447083473205566, "rewards/margins": 1.3640906810760498, "rewards/rejected": -1.1196197271347046, "step": 5092 }, { "epoch": 0.59, "learning_rate": 1.2575207772445276e-07, "logits/chosen": -3.471397638320923, "logits/rejected": -3.2719600200653076, "logps/chosen": -243.06199645996094, "logps/rejected": -345.8392333984375, "loss": 0.6261, "rewards/accuracies": 0.75, "rewards/chosen": -0.28838104009628296, "rewards/margins": 0.6559892892837524, "rewards/rejected": -0.9443702697753906, "step": 5093 }, { "epoch": 0.59, "learning_rate": 1.2571696125482852e-07, "logits/chosen": -3.005966901779175, "logits/rejected": -2.860386610031128, "logps/chosen": -125.33768463134766, "logps/rejected": -216.98428344726562, "loss": 0.4373, "rewards/accuracies": 0.75, "rewards/chosen": -0.10151642560958862, "rewards/margins": 1.3632721900939941, "rewards/rejected": -1.4647884368896484, "step": 5094 }, { "epoch": 0.59, "learning_rate": 1.2568184478520424e-07, "logits/chosen": -3.4604074954986572, "logits/rejected": -2.967810869216919, "logps/chosen": -223.649169921875, "logps/rejected": -155.54026794433594, "loss": 0.2758, "rewards/accuracies": 0.875, "rewards/chosen": 0.35581523180007935, "rewards/margins": 1.7625510692596436, "rewards/rejected": -1.4067357778549194, "step": 5095 }, { "epoch": 0.59, "learning_rate": 1.2564672831558e-07, "logits/chosen": -3.1967105865478516, "logits/rejected": -3.034351348876953, "logps/chosen": -281.3988952636719, "logps/rejected": -268.4374084472656, "loss": 0.2854, "rewards/accuracies": 0.875, "rewards/chosen": 0.23806801438331604, "rewards/margins": 2.042045831680298, "rewards/rejected": -1.8039780855178833, "step": 5096 }, { "epoch": 0.59, "learning_rate": 1.2561161184595575e-07, "logits/chosen": -3.43220591545105, "logits/rejected": -3.0261197090148926, "logps/chosen": -249.7388916015625, "logps/rejected": -232.57327270507812, "loss": 0.247, "rewards/accuracies": 1.0, "rewards/chosen": 0.6207653284072876, "rewards/margins": 2.2236382961273193, "rewards/rejected": -1.6028728485107422, "step": 5097 }, { "epoch": 0.59, "learning_rate": 1.255764953763315e-07, "logits/chosen": -3.4867067337036133, "logits/rejected": -3.6398983001708984, "logps/chosen": -233.5177001953125, "logps/rejected": -320.90240478515625, "loss": 0.3254, "rewards/accuracies": 0.875, "rewards/chosen": -0.21252700686454773, "rewards/margins": 3.3579001426696777, "rewards/rejected": -3.570427417755127, "step": 5098 }, { "epoch": 0.59, "learning_rate": 1.2554137890670723e-07, "logits/chosen": -3.1799564361572266, "logits/rejected": -3.0686426162719727, "logps/chosen": -224.99356079101562, "logps/rejected": -191.45594787597656, "loss": 0.2804, "rewards/accuracies": 0.875, "rewards/chosen": -0.02809259295463562, "rewards/margins": 1.8410484790802002, "rewards/rejected": -1.8691411018371582, "step": 5099 }, { "epoch": 0.59, "learning_rate": 1.2550626243708299e-07, "logits/chosen": -3.4630966186523438, "logits/rejected": -3.6557281017303467, "logps/chosen": -282.0107421875, "logps/rejected": -252.32705688476562, "loss": 0.2895, "rewards/accuracies": 0.875, "rewards/chosen": -0.18214941024780273, "rewards/margins": 2.1229639053344727, "rewards/rejected": -2.3051135540008545, "step": 5100 }, { "epoch": 0.59, "learning_rate": 1.2547114596745871e-07, "logits/chosen": -2.3197498321533203, "logits/rejected": -2.5977914333343506, "logps/chosen": -365.92022705078125, "logps/rejected": -286.7470703125, "loss": 0.3031, "rewards/accuracies": 0.875, "rewards/chosen": -0.40431421995162964, "rewards/margins": 1.635265588760376, "rewards/rejected": -2.0395798683166504, "step": 5101 }, { "epoch": 0.59, "learning_rate": 1.254360294978345e-07, "logits/chosen": -3.2668089866638184, "logits/rejected": -3.0273594856262207, "logps/chosen": -168.8604736328125, "logps/rejected": -205.32867431640625, "loss": 0.7513, "rewards/accuracies": 0.625, "rewards/chosen": -0.614285945892334, "rewards/margins": 0.31139981746673584, "rewards/rejected": -0.9256857633590698, "step": 5102 }, { "epoch": 0.59, "learning_rate": 1.2540091302821022e-07, "logits/chosen": -3.794553518295288, "logits/rejected": -3.7923269271850586, "logps/chosen": -165.3007049560547, "logps/rejected": -200.82066345214844, "loss": 0.2393, "rewards/accuracies": 0.875, "rewards/chosen": 0.39332714676856995, "rewards/margins": 2.1092162132263184, "rewards/rejected": -1.7158890962600708, "step": 5103 }, { "epoch": 0.59, "learning_rate": 1.2536579655858598e-07, "logits/chosen": -3.2049660682678223, "logits/rejected": -3.161163330078125, "logps/chosen": -248.8014373779297, "logps/rejected": -375.4140625, "loss": 0.532, "rewards/accuracies": 0.75, "rewards/chosen": 0.07316935807466507, "rewards/margins": 1.8015270233154297, "rewards/rejected": -1.7283576726913452, "step": 5104 }, { "epoch": 0.59, "learning_rate": 1.253306800889617e-07, "logits/chosen": -2.981464385986328, "logits/rejected": -2.7328383922576904, "logps/chosen": -418.39544677734375, "logps/rejected": -282.54193115234375, "loss": 0.7656, "rewards/accuracies": 0.375, "rewards/chosen": -0.5402126908302307, "rewards/margins": 0.11329154670238495, "rewards/rejected": -0.6535042524337769, "step": 5105 }, { "epoch": 0.59, "learning_rate": 1.2529556361933746e-07, "logits/chosen": -2.722846508026123, "logits/rejected": -3.145650863647461, "logps/chosen": -397.30169677734375, "logps/rejected": -251.1290283203125, "loss": 0.2685, "rewards/accuracies": 0.875, "rewards/chosen": 0.5785143375396729, "rewards/margins": 2.051023244857788, "rewards/rejected": -1.4725090265274048, "step": 5106 }, { "epoch": 0.59, "learning_rate": 1.252604471497132e-07, "logits/chosen": -2.6955244541168213, "logits/rejected": -3.220076084136963, "logps/chosen": -172.3206787109375, "logps/rejected": -182.2061767578125, "loss": 0.2245, "rewards/accuracies": 1.0, "rewards/chosen": 0.11834937334060669, "rewards/margins": 2.2305028438568115, "rewards/rejected": -2.1121532917022705, "step": 5107 }, { "epoch": 0.59, "learning_rate": 1.2522533068008896e-07, "logits/chosen": -3.074620246887207, "logits/rejected": -2.980717182159424, "logps/chosen": -319.91424560546875, "logps/rejected": -327.15728759765625, "loss": 0.4391, "rewards/accuracies": 0.875, "rewards/chosen": 0.4174458384513855, "rewards/margins": 1.3168244361877441, "rewards/rejected": -0.8993785977363586, "step": 5108 }, { "epoch": 0.59, "learning_rate": 1.251902142104647e-07, "logits/chosen": -3.1302223205566406, "logits/rejected": -3.093503475189209, "logps/chosen": -303.3023376464844, "logps/rejected": -290.64898681640625, "loss": 0.5396, "rewards/accuracies": 0.625, "rewards/chosen": -0.5212228298187256, "rewards/margins": 0.6032912135124207, "rewards/rejected": -1.1245139837265015, "step": 5109 }, { "epoch": 0.59, "learning_rate": 1.2515509774084045e-07, "logits/chosen": -2.592440128326416, "logits/rejected": -2.6826672554016113, "logps/chosen": -223.41751098632812, "logps/rejected": -270.8307800292969, "loss": 0.2948, "rewards/accuracies": 0.875, "rewards/chosen": -0.6458752155303955, "rewards/margins": 2.5807454586029053, "rewards/rejected": -3.22662091255188, "step": 5110 }, { "epoch": 0.59, "learning_rate": 1.251199812712162e-07, "logits/chosen": -2.7077412605285645, "logits/rejected": -2.7753868103027344, "logps/chosen": -276.3486022949219, "logps/rejected": -285.5839538574219, "loss": 0.3592, "rewards/accuracies": 1.0, "rewards/chosen": -0.10496076941490173, "rewards/margins": 1.5069990158081055, "rewards/rejected": -1.6119598150253296, "step": 5111 }, { "epoch": 0.59, "learning_rate": 1.2508486480159193e-07, "logits/chosen": -3.3332247734069824, "logits/rejected": -3.0399951934814453, "logps/chosen": -233.8594512939453, "logps/rejected": -191.0212860107422, "loss": 0.2969, "rewards/accuracies": 0.75, "rewards/chosen": -0.2931063771247864, "rewards/margins": 2.3192198276519775, "rewards/rejected": -2.612326145172119, "step": 5112 }, { "epoch": 0.59, "learning_rate": 1.2504974833196768e-07, "logits/chosen": -2.8460159301757812, "logits/rejected": -2.6648716926574707, "logps/chosen": -316.8892822265625, "logps/rejected": -206.99224853515625, "loss": 0.3231, "rewards/accuracies": 0.875, "rewards/chosen": -0.04325654357671738, "rewards/margins": 1.5111477375030518, "rewards/rejected": -1.5544042587280273, "step": 5113 }, { "epoch": 0.59, "learning_rate": 1.2501463186234343e-07, "logits/chosen": -3.2239773273468018, "logits/rejected": -2.935886859893799, "logps/chosen": -262.8858337402344, "logps/rejected": -238.65518188476562, "loss": 0.4349, "rewards/accuracies": 0.625, "rewards/chosen": -0.06051987409591675, "rewards/margins": 1.341530442237854, "rewards/rejected": -1.402050256729126, "step": 5114 }, { "epoch": 0.59, "learning_rate": 1.249795153927192e-07, "logits/chosen": -3.9414167404174805, "logits/rejected": -3.9088540077209473, "logps/chosen": -262.698486328125, "logps/rejected": -351.52734375, "loss": 0.3499, "rewards/accuracies": 0.875, "rewards/chosen": -1.0574973821640015, "rewards/margins": 2.0938568115234375, "rewards/rejected": -3.1513540744781494, "step": 5115 }, { "epoch": 0.59, "learning_rate": 1.2494439892309492e-07, "logits/chosen": -2.84096097946167, "logits/rejected": -3.148622989654541, "logps/chosen": -284.6800231933594, "logps/rejected": -286.5943298339844, "loss": 0.3847, "rewards/accuracies": 0.875, "rewards/chosen": -0.03002943843603134, "rewards/margins": 1.3376684188842773, "rewards/rejected": -1.3676979541778564, "step": 5116 }, { "epoch": 0.59, "learning_rate": 1.2490928245347067e-07, "logits/chosen": -2.510728120803833, "logits/rejected": -2.775230884552002, "logps/chosen": -276.8660888671875, "logps/rejected": -182.60079956054688, "loss": 0.6986, "rewards/accuracies": 0.625, "rewards/chosen": -0.5546553134918213, "rewards/margins": 0.4451044201850891, "rewards/rejected": -0.9997596740722656, "step": 5117 }, { "epoch": 0.59, "learning_rate": 1.2487416598384642e-07, "logits/chosen": -3.403625726699829, "logits/rejected": -3.6277008056640625, "logps/chosen": -228.04000854492188, "logps/rejected": -214.87258911132812, "loss": 0.1237, "rewards/accuracies": 1.0, "rewards/chosen": 0.43365222215652466, "rewards/margins": 3.4498209953308105, "rewards/rejected": -3.0161688327789307, "step": 5118 }, { "epoch": 0.59, "learning_rate": 1.2483904951422218e-07, "logits/chosen": -3.393980026245117, "logits/rejected": -3.198084831237793, "logps/chosen": -311.3647155761719, "logps/rejected": -211.47418212890625, "loss": 0.3923, "rewards/accuracies": 1.0, "rewards/chosen": -0.0815800279378891, "rewards/margins": 0.7866491675376892, "rewards/rejected": -0.8682292103767395, "step": 5119 }, { "epoch": 0.59, "learning_rate": 1.248039330445979e-07, "logits/chosen": -2.5145881175994873, "logits/rejected": -2.6762452125549316, "logps/chosen": -213.59185791015625, "logps/rejected": -144.87527465820312, "loss": 0.3309, "rewards/accuracies": 1.0, "rewards/chosen": 0.22275319695472717, "rewards/margins": 1.267820119857788, "rewards/rejected": -1.0450668334960938, "step": 5120 }, { "epoch": 0.59, "learning_rate": 1.2476881657497366e-07, "logits/chosen": -2.9267120361328125, "logits/rejected": -2.855530261993408, "logps/chosen": -137.56900024414062, "logps/rejected": -231.34202575683594, "loss": 0.2916, "rewards/accuracies": 0.875, "rewards/chosen": 0.16564035415649414, "rewards/margins": 2.381357192993164, "rewards/rejected": -2.21571683883667, "step": 5121 }, { "epoch": 0.59, "learning_rate": 1.247337001053494e-07, "logits/chosen": -3.0637199878692627, "logits/rejected": -3.181149482727051, "logps/chosen": -273.14910888671875, "logps/rejected": -292.7840576171875, "loss": 0.4791, "rewards/accuracies": 0.625, "rewards/chosen": -0.25485843420028687, "rewards/margins": 1.5640875101089478, "rewards/rejected": -1.8189458847045898, "step": 5122 }, { "epoch": 0.59, "learning_rate": 1.2469858363572514e-07, "logits/chosen": -2.9009571075439453, "logits/rejected": -2.716930389404297, "logps/chosen": -408.74090576171875, "logps/rejected": -416.535400390625, "loss": 0.6914, "rewards/accuracies": 0.75, "rewards/chosen": -0.1782316118478775, "rewards/margins": 1.0850670337677002, "rewards/rejected": -1.2632986307144165, "step": 5123 }, { "epoch": 0.59, "learning_rate": 1.246634671661009e-07, "logits/chosen": -3.664851427078247, "logits/rejected": -4.0036187171936035, "logps/chosen": -102.59632873535156, "logps/rejected": -219.96957397460938, "loss": 0.3498, "rewards/accuracies": 0.875, "rewards/chosen": 0.08753354847431183, "rewards/margins": 1.4200209379196167, "rewards/rejected": -1.3324873447418213, "step": 5124 }, { "epoch": 0.59, "learning_rate": 1.2462835069647665e-07, "logits/chosen": -3.18683123588562, "logits/rejected": -3.395913600921631, "logps/chosen": -298.7718811035156, "logps/rejected": -251.67849731445312, "loss": 0.2347, "rewards/accuracies": 0.875, "rewards/chosen": 0.7214133143424988, "rewards/margins": 3.402377128601074, "rewards/rejected": -2.6809637546539307, "step": 5125 }, { "epoch": 0.59, "learning_rate": 1.245932342268524e-07, "logits/chosen": -2.568044424057007, "logits/rejected": -2.509856700897217, "logps/chosen": -209.97274780273438, "logps/rejected": -280.86871337890625, "loss": 0.3376, "rewards/accuracies": 1.0, "rewards/chosen": -0.16808778047561646, "rewards/margins": 1.1652448177337646, "rewards/rejected": -1.3333325386047363, "step": 5126 }, { "epoch": 0.59, "learning_rate": 1.2455811775722813e-07, "logits/chosen": -3.292430877685547, "logits/rejected": -3.012542963027954, "logps/chosen": -285.42822265625, "logps/rejected": -222.84686279296875, "loss": 0.6882, "rewards/accuracies": 0.625, "rewards/chosen": -1.3531725406646729, "rewards/margins": 0.7035712003707886, "rewards/rejected": -2.056743621826172, "step": 5127 }, { "epoch": 0.59, "learning_rate": 1.2452300128760388e-07, "logits/chosen": -2.6094465255737305, "logits/rejected": -2.8407602310180664, "logps/chosen": -383.1266784667969, "logps/rejected": -309.13916015625, "loss": 0.2689, "rewards/accuracies": 0.875, "rewards/chosen": 0.28032296895980835, "rewards/margins": 2.163501024246216, "rewards/rejected": -1.8831779956817627, "step": 5128 }, { "epoch": 0.59, "learning_rate": 1.244878848179796e-07, "logits/chosen": -2.8577542304992676, "logits/rejected": -2.7162580490112305, "logps/chosen": -268.920166015625, "logps/rejected": -358.79058837890625, "loss": 0.5009, "rewards/accuracies": 0.75, "rewards/chosen": 0.05660443753004074, "rewards/margins": 2.738135814666748, "rewards/rejected": -2.6815311908721924, "step": 5129 }, { "epoch": 0.59, "learning_rate": 1.244527683483554e-07, "logits/chosen": -2.692230224609375, "logits/rejected": -3.1477882862091064, "logps/chosen": -215.27191162109375, "logps/rejected": -237.44729614257812, "loss": 0.4166, "rewards/accuracies": 0.75, "rewards/chosen": 0.4310786724090576, "rewards/margins": 1.7590134143829346, "rewards/rejected": -1.327934980392456, "step": 5130 }, { "epoch": 0.59, "learning_rate": 1.2441765187873112e-07, "logits/chosen": -2.5057621002197266, "logits/rejected": -2.8244967460632324, "logps/chosen": -161.97804260253906, "logps/rejected": -193.97213745117188, "loss": 0.3752, "rewards/accuracies": 0.875, "rewards/chosen": 0.6203717589378357, "rewards/margins": 1.5864571332931519, "rewards/rejected": -0.9660854339599609, "step": 5131 }, { "epoch": 0.59, "learning_rate": 1.2438253540910687e-07, "logits/chosen": -2.2675700187683105, "logits/rejected": -2.3567779064178467, "logps/chosen": -418.916015625, "logps/rejected": -200.3754425048828, "loss": 0.3388, "rewards/accuracies": 0.875, "rewards/chosen": 0.10373665392398834, "rewards/margins": 1.4681721925735474, "rewards/rejected": -1.3644354343414307, "step": 5132 }, { "epoch": 0.59, "learning_rate": 1.243474189394826e-07, "logits/chosen": -2.542189836502075, "logits/rejected": -2.658172845840454, "logps/chosen": -282.65771484375, "logps/rejected": -216.7683563232422, "loss": 0.2286, "rewards/accuracies": 1.0, "rewards/chosen": 0.5716956853866577, "rewards/margins": 2.353724479675293, "rewards/rejected": -1.7820289134979248, "step": 5133 }, { "epoch": 0.59, "learning_rate": 1.2431230246985835e-07, "logits/chosen": -2.9660253524780273, "logits/rejected": -2.8505380153656006, "logps/chosen": -395.95263671875, "logps/rejected": -235.45089721679688, "loss": 0.6385, "rewards/accuracies": 0.75, "rewards/chosen": -0.4191504120826721, "rewards/margins": 0.9702684879302979, "rewards/rejected": -1.3894188404083252, "step": 5134 }, { "epoch": 0.59, "learning_rate": 1.242771860002341e-07, "logits/chosen": -3.4007906913757324, "logits/rejected": -3.733142614364624, "logps/chosen": -315.6585388183594, "logps/rejected": -278.4137878417969, "loss": 0.3449, "rewards/accuracies": 0.875, "rewards/chosen": 0.005838632583618164, "rewards/margins": 2.1229376792907715, "rewards/rejected": -2.1170990467071533, "step": 5135 }, { "epoch": 0.59, "learning_rate": 1.2424206953060986e-07, "logits/chosen": -3.526848793029785, "logits/rejected": -3.306994915008545, "logps/chosen": -286.5155944824219, "logps/rejected": -170.82574462890625, "loss": 0.4011, "rewards/accuracies": 0.875, "rewards/chosen": 0.20262190699577332, "rewards/margins": 0.9343003034591675, "rewards/rejected": -0.7316783666610718, "step": 5136 }, { "epoch": 0.59, "learning_rate": 1.242069530609856e-07, "logits/chosen": -2.4026148319244385, "logits/rejected": -2.529700756072998, "logps/chosen": -306.0565185546875, "logps/rejected": -346.70477294921875, "loss": 0.3879, "rewards/accuracies": 0.75, "rewards/chosen": -0.10417544841766357, "rewards/margins": 2.3160579204559326, "rewards/rejected": -2.4202332496643066, "step": 5137 }, { "epoch": 0.59, "learning_rate": 1.2417183659136134e-07, "logits/chosen": -3.093322515487671, "logits/rejected": -3.5482943058013916, "logps/chosen": -265.547607421875, "logps/rejected": -286.43084716796875, "loss": 0.3587, "rewards/accuracies": 0.875, "rewards/chosen": -0.06096839904785156, "rewards/margins": 1.304715871810913, "rewards/rejected": -1.3656842708587646, "step": 5138 }, { "epoch": 0.59, "learning_rate": 1.241367201217371e-07, "logits/chosen": -3.9022932052612305, "logits/rejected": -3.9884281158447266, "logps/chosen": -199.88873291015625, "logps/rejected": -164.64747619628906, "loss": 0.5254, "rewards/accuracies": 0.625, "rewards/chosen": -0.4884019196033478, "rewards/margins": 1.1995644569396973, "rewards/rejected": -1.6879663467407227, "step": 5139 }, { "epoch": 0.59, "learning_rate": 1.2410160365211282e-07, "logits/chosen": -3.5390703678131104, "logits/rejected": -3.4585611820220947, "logps/chosen": -341.9511413574219, "logps/rejected": -354.1522521972656, "loss": 0.2497, "rewards/accuracies": 0.875, "rewards/chosen": 0.07511122524738312, "rewards/margins": 2.7844161987304688, "rewards/rejected": -2.7093048095703125, "step": 5140 }, { "epoch": 0.59, "learning_rate": 1.2406648718248858e-07, "logits/chosen": -3.0640769004821777, "logits/rejected": -3.085679531097412, "logps/chosen": -193.04376220703125, "logps/rejected": -213.20973205566406, "loss": 0.4788, "rewards/accuracies": 0.75, "rewards/chosen": -0.3558647334575653, "rewards/margins": 1.142473816871643, "rewards/rejected": -1.4983384609222412, "step": 5141 }, { "epoch": 0.59, "learning_rate": 1.2403137071286433e-07, "logits/chosen": -2.8975396156311035, "logits/rejected": -3.4024884700775146, "logps/chosen": -163.31744384765625, "logps/rejected": -251.74160766601562, "loss": 0.3609, "rewards/accuracies": 0.75, "rewards/chosen": -0.06632895767688751, "rewards/margins": 1.6642582416534424, "rewards/rejected": -1.7305872440338135, "step": 5142 }, { "epoch": 0.59, "learning_rate": 1.2399625424324008e-07, "logits/chosen": -3.1446986198425293, "logits/rejected": -3.116234302520752, "logps/chosen": -221.79779052734375, "logps/rejected": -340.4810791015625, "loss": 0.1868, "rewards/accuracies": 1.0, "rewards/chosen": 0.039515942335128784, "rewards/margins": 2.5974526405334473, "rewards/rejected": -2.557936668395996, "step": 5143 }, { "epoch": 0.59, "learning_rate": 1.239611377736158e-07, "logits/chosen": -3.0500571727752686, "logits/rejected": -2.977799892425537, "logps/chosen": -420.51318359375, "logps/rejected": -341.87811279296875, "loss": 0.7214, "rewards/accuracies": 0.875, "rewards/chosen": -0.19016128778457642, "rewards/margins": 1.105961561203003, "rewards/rejected": -1.2961229085922241, "step": 5144 }, { "epoch": 0.59, "learning_rate": 1.2392602130399157e-07, "logits/chosen": -2.2986419200897217, "logits/rejected": -2.478050470352173, "logps/chosen": -287.5576171875, "logps/rejected": -286.1418151855469, "loss": 0.8768, "rewards/accuracies": 0.75, "rewards/chosen": -0.2835155129432678, "rewards/margins": 0.39522460103034973, "rewards/rejected": -0.6787400841712952, "step": 5145 }, { "epoch": 0.59, "learning_rate": 1.238909048343673e-07, "logits/chosen": -2.921954870223999, "logits/rejected": -3.044980049133301, "logps/chosen": -358.2756652832031, "logps/rejected": -351.0960693359375, "loss": 0.5455, "rewards/accuracies": 0.625, "rewards/chosen": -0.5187159776687622, "rewards/margins": 0.9334307909011841, "rewards/rejected": -1.4521467685699463, "step": 5146 }, { "epoch": 0.59, "learning_rate": 1.2385578836474307e-07, "logits/chosen": -2.869605779647827, "logits/rejected": -3.122382879257202, "logps/chosen": -361.6567687988281, "logps/rejected": -172.15972900390625, "loss": 0.2119, "rewards/accuracies": 1.0, "rewards/chosen": 0.2931489050388336, "rewards/margins": 2.1417300701141357, "rewards/rejected": -1.848581314086914, "step": 5147 }, { "epoch": 0.59, "learning_rate": 1.238206718951188e-07, "logits/chosen": -2.826767921447754, "logits/rejected": -2.8080570697784424, "logps/chosen": -316.5094909667969, "logps/rejected": -220.82273864746094, "loss": 0.2032, "rewards/accuracies": 1.0, "rewards/chosen": 0.009382015094161034, "rewards/margins": 1.6772688627243042, "rewards/rejected": -1.6678868532180786, "step": 5148 }, { "epoch": 0.59, "learning_rate": 1.2378555542549455e-07, "logits/chosen": -2.4643707275390625, "logits/rejected": -2.3738160133361816, "logps/chosen": -230.30197143554688, "logps/rejected": -400.03924560546875, "loss": 0.4456, "rewards/accuracies": 0.75, "rewards/chosen": 0.19337430596351624, "rewards/margins": 1.6614190340042114, "rewards/rejected": -1.4680447578430176, "step": 5149 }, { "epoch": 0.59, "learning_rate": 1.2375043895587028e-07, "logits/chosen": -3.0783121585845947, "logits/rejected": -2.9951138496398926, "logps/chosen": -311.6236572265625, "logps/rejected": -331.98797607421875, "loss": 0.201, "rewards/accuracies": 1.0, "rewards/chosen": -0.4024505913257599, "rewards/margins": 2.564390182495117, "rewards/rejected": -2.9668407440185547, "step": 5150 }, { "epoch": 0.59, "learning_rate": 1.2371532248624604e-07, "logits/chosen": -3.188065528869629, "logits/rejected": -3.387887716293335, "logps/chosen": -163.85888671875, "logps/rejected": -191.5136260986328, "loss": 0.391, "rewards/accuracies": 0.75, "rewards/chosen": -0.3803991377353668, "rewards/margins": 1.5723127126693726, "rewards/rejected": -1.952711820602417, "step": 5151 }, { "epoch": 0.59, "learning_rate": 1.236802060166218e-07, "logits/chosen": -2.56868052482605, "logits/rejected": -2.7470569610595703, "logps/chosen": -264.04254150390625, "logps/rejected": -417.3427734375, "loss": 0.5542, "rewards/accuracies": 0.625, "rewards/chosen": -0.43324828147888184, "rewards/margins": 0.6258847117424011, "rewards/rejected": -1.0591329336166382, "step": 5152 }, { "epoch": 0.59, "learning_rate": 1.2364508954699754e-07, "logits/chosen": -3.365210771560669, "logits/rejected": -3.1227645874023438, "logps/chosen": -262.933837890625, "logps/rejected": -336.25677490234375, "loss": 0.2622, "rewards/accuracies": 0.875, "rewards/chosen": -0.3530784547328949, "rewards/margins": 3.0454087257385254, "rewards/rejected": -3.398487091064453, "step": 5153 }, { "epoch": 0.59, "learning_rate": 1.2360997307737327e-07, "logits/chosen": -3.9119112491607666, "logits/rejected": -3.6367697715759277, "logps/chosen": -184.5072021484375, "logps/rejected": -171.3543701171875, "loss": 0.4592, "rewards/accuracies": 0.75, "rewards/chosen": -0.8210390210151672, "rewards/margins": 1.0980569124221802, "rewards/rejected": -1.9190958738327026, "step": 5154 }, { "epoch": 0.59, "learning_rate": 1.2357485660774903e-07, "logits/chosen": -2.72275972366333, "logits/rejected": -2.8188462257385254, "logps/chosen": -227.47532653808594, "logps/rejected": -299.435546875, "loss": 0.1807, "rewards/accuracies": 0.875, "rewards/chosen": 0.2197084128856659, "rewards/margins": 2.520461082458496, "rewards/rejected": -2.3007524013519287, "step": 5155 }, { "epoch": 0.59, "learning_rate": 1.2353974013812478e-07, "logits/chosen": -3.1895558834075928, "logits/rejected": -3.1331796646118164, "logps/chosen": -288.162109375, "logps/rejected": -213.41281127929688, "loss": 0.6024, "rewards/accuracies": 0.75, "rewards/chosen": -0.5914193987846375, "rewards/margins": 0.756540834903717, "rewards/rejected": -1.3479602336883545, "step": 5156 }, { "epoch": 0.59, "learning_rate": 1.235046236685005e-07, "logits/chosen": -3.620007038116455, "logits/rejected": -3.8738200664520264, "logps/chosen": -141.0775909423828, "logps/rejected": -243.56143188476562, "loss": 0.211, "rewards/accuracies": 1.0, "rewards/chosen": -0.4806353449821472, "rewards/margins": 2.7707862854003906, "rewards/rejected": -3.2514216899871826, "step": 5157 }, { "epoch": 0.59, "learning_rate": 1.2346950719887626e-07, "logits/chosen": -3.41267991065979, "logits/rejected": -3.5527589321136475, "logps/chosen": -370.16900634765625, "logps/rejected": -284.5588073730469, "loss": 0.2764, "rewards/accuracies": 0.875, "rewards/chosen": -0.2605476379394531, "rewards/margins": 2.0956666469573975, "rewards/rejected": -2.3562142848968506, "step": 5158 }, { "epoch": 0.59, "learning_rate": 1.2343439072925201e-07, "logits/chosen": -3.213653087615967, "logits/rejected": -3.373002052307129, "logps/chosen": -384.2462463378906, "logps/rejected": -295.5800476074219, "loss": 0.6025, "rewards/accuracies": 0.625, "rewards/chosen": -0.032629773020744324, "rewards/margins": 1.5159854888916016, "rewards/rejected": -1.5486153364181519, "step": 5159 }, { "epoch": 0.59, "learning_rate": 1.2339927425962777e-07, "logits/chosen": -3.0373315811157227, "logits/rejected": -3.1449544429779053, "logps/chosen": -249.20455932617188, "logps/rejected": -237.3478240966797, "loss": 0.8793, "rewards/accuracies": 0.75, "rewards/chosen": -0.3325718939304352, "rewards/margins": 0.3562734127044678, "rewards/rejected": -0.6888452768325806, "step": 5160 }, { "epoch": 0.59, "learning_rate": 1.233641577900035e-07, "logits/chosen": -3.1594860553741455, "logits/rejected": -3.129887580871582, "logps/chosen": -275.6136779785156, "logps/rejected": -245.92340087890625, "loss": 0.4689, "rewards/accuracies": 0.75, "rewards/chosen": -0.040759071707725525, "rewards/margins": 1.006184697151184, "rewards/rejected": -1.0469439029693604, "step": 5161 }, { "epoch": 0.6, "learning_rate": 1.2332904132037925e-07, "logits/chosen": -2.2091431617736816, "logits/rejected": -2.653167724609375, "logps/chosen": -444.1913146972656, "logps/rejected": -193.44659423828125, "loss": 0.4206, "rewards/accuracies": 0.75, "rewards/chosen": -0.037239450961351395, "rewards/margins": 1.749734878540039, "rewards/rejected": -1.7869744300842285, "step": 5162 }, { "epoch": 0.6, "learning_rate": 1.23293924850755e-07, "logits/chosen": -2.809598445892334, "logits/rejected": -2.874854564666748, "logps/chosen": -466.6560974121094, "logps/rejected": -404.8394470214844, "loss": 0.2971, "rewards/accuracies": 0.875, "rewards/chosen": 0.6390507221221924, "rewards/margins": 3.0227246284484863, "rewards/rejected": -2.383673906326294, "step": 5163 }, { "epoch": 0.6, "learning_rate": 1.2325880838113076e-07, "logits/chosen": -2.729487895965576, "logits/rejected": -2.9293627738952637, "logps/chosen": -500.4903564453125, "logps/rejected": -310.22930908203125, "loss": 0.1662, "rewards/accuracies": 1.0, "rewards/chosen": 0.25695616006851196, "rewards/margins": 2.245103597640991, "rewards/rejected": -1.9881473779678345, "step": 5164 }, { "epoch": 0.6, "learning_rate": 1.2322369191150648e-07, "logits/chosen": -2.759629011154175, "logits/rejected": -2.6159956455230713, "logps/chosen": -494.8719482421875, "logps/rejected": -302.10479736328125, "loss": 0.3733, "rewards/accuracies": 0.875, "rewards/chosen": -0.09541263431310654, "rewards/margins": 1.8479219675064087, "rewards/rejected": -1.9433344602584839, "step": 5165 }, { "epoch": 0.6, "learning_rate": 1.2318857544188224e-07, "logits/chosen": -3.351102352142334, "logits/rejected": -3.1951398849487305, "logps/chosen": -239.81430053710938, "logps/rejected": -227.35720825195312, "loss": 0.3937, "rewards/accuracies": 0.875, "rewards/chosen": 0.08247008919715881, "rewards/margins": 2.307767391204834, "rewards/rejected": -2.225297212600708, "step": 5166 }, { "epoch": 0.6, "learning_rate": 1.23153458972258e-07, "logits/chosen": -2.204308032989502, "logits/rejected": -2.259019374847412, "logps/chosen": -370.3982238769531, "logps/rejected": -269.4747009277344, "loss": 0.2341, "rewards/accuracies": 1.0, "rewards/chosen": 0.45790329575538635, "rewards/margins": 2.18782901763916, "rewards/rejected": -1.7299258708953857, "step": 5167 }, { "epoch": 0.6, "learning_rate": 1.2311834250263372e-07, "logits/chosen": -2.803575038909912, "logits/rejected": -2.5925161838531494, "logps/chosen": -417.7294006347656, "logps/rejected": -300.6839599609375, "loss": 0.6917, "rewards/accuracies": 0.75, "rewards/chosen": 0.3312559425830841, "rewards/margins": 1.2431349754333496, "rewards/rejected": -0.9118790030479431, "step": 5168 }, { "epoch": 0.6, "learning_rate": 1.2308322603300947e-07, "logits/chosen": -3.617661237716675, "logits/rejected": -3.525425434112549, "logps/chosen": -283.5308837890625, "logps/rejected": -181.88235473632812, "loss": 0.4695, "rewards/accuracies": 0.75, "rewards/chosen": -0.32231175899505615, "rewards/margins": 0.8831068277359009, "rewards/rejected": -1.205418586730957, "step": 5169 }, { "epoch": 0.6, "learning_rate": 1.2304810956338523e-07, "logits/chosen": -3.0581657886505127, "logits/rejected": -2.835472583770752, "logps/chosen": -176.3271026611328, "logps/rejected": -164.31805419921875, "loss": 0.4601, "rewards/accuracies": 0.625, "rewards/chosen": -0.5914827585220337, "rewards/margins": 1.153172492980957, "rewards/rejected": -1.7446553707122803, "step": 5170 }, { "epoch": 0.6, "learning_rate": 1.2301299309376098e-07, "logits/chosen": -2.9150075912475586, "logits/rejected": -3.131957769393921, "logps/chosen": -355.2781982421875, "logps/rejected": -271.6290588378906, "loss": 0.4646, "rewards/accuracies": 0.875, "rewards/chosen": 0.1303451508283615, "rewards/margins": 1.7111685276031494, "rewards/rejected": -1.580823540687561, "step": 5171 }, { "epoch": 0.6, "learning_rate": 1.229778766241367e-07, "logits/chosen": -3.3008627891540527, "logits/rejected": -3.500185012817383, "logps/chosen": -227.70870971679688, "logps/rejected": -286.99676513671875, "loss": 0.9137, "rewards/accuracies": 0.75, "rewards/chosen": -0.4265899658203125, "rewards/margins": 1.3762304782867432, "rewards/rejected": -1.8028204441070557, "step": 5172 }, { "epoch": 0.6, "learning_rate": 1.2294276015451246e-07, "logits/chosen": -3.062892436981201, "logits/rejected": -3.0610804557800293, "logps/chosen": -337.1151123046875, "logps/rejected": -220.88787841796875, "loss": 0.2163, "rewards/accuracies": 1.0, "rewards/chosen": 0.4035223424434662, "rewards/margins": 2.117227554321289, "rewards/rejected": -1.71370530128479, "step": 5173 }, { "epoch": 0.6, "learning_rate": 1.229076436848882e-07, "logits/chosen": -3.16302227973938, "logits/rejected": -2.93937611579895, "logps/chosen": -431.26702880859375, "logps/rejected": -457.2254638671875, "loss": 0.3975, "rewards/accuracies": 0.625, "rewards/chosen": -0.04858148843050003, "rewards/margins": 1.5382827520370483, "rewards/rejected": -1.5868642330169678, "step": 5174 }, { "epoch": 0.6, "learning_rate": 1.2287252721526397e-07, "logits/chosen": -2.721254348754883, "logits/rejected": -2.4619572162628174, "logps/chosen": -149.45738220214844, "logps/rejected": -97.16966247558594, "loss": 0.6138, "rewards/accuracies": 0.625, "rewards/chosen": -0.4412391185760498, "rewards/margins": 0.8806108236312866, "rewards/rejected": -1.3218498229980469, "step": 5175 }, { "epoch": 0.6, "learning_rate": 1.228374107456397e-07, "logits/chosen": -3.1360621452331543, "logits/rejected": -3.457526445388794, "logps/chosen": -300.4748229980469, "logps/rejected": -274.537109375, "loss": 0.7781, "rewards/accuracies": 0.625, "rewards/chosen": -0.5320114493370056, "rewards/margins": 0.6593219041824341, "rewards/rejected": -1.191333293914795, "step": 5176 }, { "epoch": 0.6, "learning_rate": 1.2280229427601545e-07, "logits/chosen": -2.9052743911743164, "logits/rejected": -2.901937246322632, "logps/chosen": -262.151611328125, "logps/rejected": -213.69956970214844, "loss": 0.2505, "rewards/accuracies": 0.875, "rewards/chosen": 0.21114599704742432, "rewards/margins": 1.6099231243133545, "rewards/rejected": -1.3987771272659302, "step": 5177 }, { "epoch": 0.6, "learning_rate": 1.2276717780639118e-07, "logits/chosen": -3.143329381942749, "logits/rejected": -3.0886950492858887, "logps/chosen": -165.79611206054688, "logps/rejected": -289.0396423339844, "loss": 0.3345, "rewards/accuracies": 0.875, "rewards/chosen": -0.4432068467140198, "rewards/margins": 2.25163197517395, "rewards/rejected": -2.694838762283325, "step": 5178 }, { "epoch": 0.6, "learning_rate": 1.2273206133676693e-07, "logits/chosen": -3.8247275352478027, "logits/rejected": -4.282573223114014, "logps/chosen": -102.87472534179688, "logps/rejected": -190.88671875, "loss": 0.3997, "rewards/accuracies": 0.875, "rewards/chosen": -0.7284829616546631, "rewards/margins": 2.4708025455474854, "rewards/rejected": -3.1992855072021484, "step": 5179 }, { "epoch": 0.6, "learning_rate": 1.2269694486714269e-07, "logits/chosen": -2.9088070392608643, "logits/rejected": -3.211364269256592, "logps/chosen": -268.937744140625, "logps/rejected": -358.1583557128906, "loss": 0.351, "rewards/accuracies": 0.875, "rewards/chosen": -0.44695067405700684, "rewards/margins": 2.32647705078125, "rewards/rejected": -2.773427724838257, "step": 5180 }, { "epoch": 0.6, "learning_rate": 1.2266182839751844e-07, "logits/chosen": -2.664862871170044, "logits/rejected": -3.0151631832122803, "logps/chosen": -314.10125732421875, "logps/rejected": -191.87794494628906, "loss": 0.8745, "rewards/accuracies": 0.625, "rewards/chosen": -0.7956207394599915, "rewards/margins": 0.1315751075744629, "rewards/rejected": -0.9271959662437439, "step": 5181 }, { "epoch": 0.6, "learning_rate": 1.2262671192789417e-07, "logits/chosen": -4.011450290679932, "logits/rejected": -3.760204315185547, "logps/chosen": -379.6104736328125, "logps/rejected": -390.9476013183594, "loss": 0.5916, "rewards/accuracies": 0.625, "rewards/chosen": 0.1002708375453949, "rewards/margins": 1.4575798511505127, "rewards/rejected": -1.3573089838027954, "step": 5182 }, { "epoch": 0.6, "learning_rate": 1.2259159545826992e-07, "logits/chosen": -2.348848819732666, "logits/rejected": -2.8163280487060547, "logps/chosen": -446.20574951171875, "logps/rejected": -251.6975555419922, "loss": 0.4218, "rewards/accuracies": 0.625, "rewards/chosen": 0.2143925279378891, "rewards/margins": 2.2070815563201904, "rewards/rejected": -1.9926890134811401, "step": 5183 }, { "epoch": 0.6, "learning_rate": 1.2255647898864568e-07, "logits/chosen": -3.4913954734802246, "logits/rejected": -3.388582944869995, "logps/chosen": -285.34344482421875, "logps/rejected": -275.7471923828125, "loss": 0.7703, "rewards/accuracies": 0.625, "rewards/chosen": -0.3055725693702698, "rewards/margins": 1.5932461023330688, "rewards/rejected": -1.8988187313079834, "step": 5184 }, { "epoch": 0.6, "learning_rate": 1.225213625190214e-07, "logits/chosen": -2.2849247455596924, "logits/rejected": -2.199320077896118, "logps/chosen": -336.43896484375, "logps/rejected": -326.4547424316406, "loss": 0.5351, "rewards/accuracies": 0.625, "rewards/chosen": 0.32438692450523376, "rewards/margins": 1.9414520263671875, "rewards/rejected": -1.617065191268921, "step": 5185 }, { "epoch": 0.6, "learning_rate": 1.2248624604939716e-07, "logits/chosen": -2.9985289573669434, "logits/rejected": -2.9439406394958496, "logps/chosen": -253.88369750976562, "logps/rejected": -272.1767578125, "loss": 0.1663, "rewards/accuracies": 0.875, "rewards/chosen": -0.05855366587638855, "rewards/margins": 3.2972700595855713, "rewards/rejected": -3.3558237552642822, "step": 5186 }, { "epoch": 0.6, "learning_rate": 1.224511295797729e-07, "logits/chosen": -3.2378978729248047, "logits/rejected": -2.902519941329956, "logps/chosen": -267.2054443359375, "logps/rejected": -169.57061767578125, "loss": 0.462, "rewards/accuracies": 0.75, "rewards/chosen": -0.10090598464012146, "rewards/margins": 1.0095982551574707, "rewards/rejected": -1.110504150390625, "step": 5187 }, { "epoch": 0.6, "learning_rate": 1.2241601311014866e-07, "logits/chosen": -2.7439441680908203, "logits/rejected": -2.5505380630493164, "logps/chosen": -432.6770324707031, "logps/rejected": -374.92626953125, "loss": 0.8003, "rewards/accuracies": 0.75, "rewards/chosen": -0.14458434283733368, "rewards/margins": 0.4205944240093231, "rewards/rejected": -0.5651787519454956, "step": 5188 }, { "epoch": 0.6, "learning_rate": 1.223808966405244e-07, "logits/chosen": -2.9126179218292236, "logits/rejected": -2.9583144187927246, "logps/chosen": -219.92019653320312, "logps/rejected": -376.5243225097656, "loss": 0.2212, "rewards/accuracies": 0.875, "rewards/chosen": -0.05798976868391037, "rewards/margins": 2.5000534057617188, "rewards/rejected": -2.5580430030822754, "step": 5189 }, { "epoch": 0.6, "learning_rate": 1.2234578017090015e-07, "logits/chosen": -2.811135768890381, "logits/rejected": -2.784280776977539, "logps/chosen": -255.41860961914062, "logps/rejected": -279.0583801269531, "loss": 0.6034, "rewards/accuracies": 0.875, "rewards/chosen": 0.25849589705467224, "rewards/margins": 0.8028140664100647, "rewards/rejected": -0.5443181395530701, "step": 5190 }, { "epoch": 0.6, "learning_rate": 1.2231066370127587e-07, "logits/chosen": -2.988229513168335, "logits/rejected": -2.9353456497192383, "logps/chosen": -292.20159912109375, "logps/rejected": -398.951416015625, "loss": 0.6775, "rewards/accuracies": 0.625, "rewards/chosen": -0.42451125383377075, "rewards/margins": 0.22684772312641144, "rewards/rejected": -0.651358962059021, "step": 5191 }, { "epoch": 0.6, "learning_rate": 1.2227554723165165e-07, "logits/chosen": -3.631828784942627, "logits/rejected": -3.5053586959838867, "logps/chosen": -231.27569580078125, "logps/rejected": -250.5157012939453, "loss": 0.4285, "rewards/accuracies": 0.875, "rewards/chosen": 0.17528848350048065, "rewards/margins": 1.556211233139038, "rewards/rejected": -1.380922794342041, "step": 5192 }, { "epoch": 0.6, "learning_rate": 1.2224043076202738e-07, "logits/chosen": -3.0040669441223145, "logits/rejected": -2.959940195083618, "logps/chosen": -281.1169738769531, "logps/rejected": -302.4212951660156, "loss": 0.1298, "rewards/accuracies": 0.875, "rewards/chosen": 0.765924334526062, "rewards/margins": 4.009528160095215, "rewards/rejected": -3.2436037063598633, "step": 5193 }, { "epoch": 0.6, "learning_rate": 1.2220531429240313e-07, "logits/chosen": -3.0549678802490234, "logits/rejected": -2.9327633380889893, "logps/chosen": -170.459228515625, "logps/rejected": -202.31314086914062, "loss": 0.554, "rewards/accuracies": 0.625, "rewards/chosen": 0.3828567862510681, "rewards/margins": 1.072320580482483, "rewards/rejected": -0.6894637942314148, "step": 5194 }, { "epoch": 0.6, "learning_rate": 1.2217019782277886e-07, "logits/chosen": -3.3431754112243652, "logits/rejected": -3.401245594024658, "logps/chosen": -203.76792907714844, "logps/rejected": -276.31671142578125, "loss": 0.3384, "rewards/accuracies": 0.875, "rewards/chosen": 0.16383762657642365, "rewards/margins": 1.644264817237854, "rewards/rejected": -1.4804272651672363, "step": 5195 }, { "epoch": 0.6, "learning_rate": 1.2213508135315462e-07, "logits/chosen": -3.3390755653381348, "logits/rejected": -3.496105194091797, "logps/chosen": -198.2833251953125, "logps/rejected": -214.4532012939453, "loss": 0.2186, "rewards/accuracies": 1.0, "rewards/chosen": 0.2153109908103943, "rewards/margins": 1.8719356060028076, "rewards/rejected": -1.6566245555877686, "step": 5196 }, { "epoch": 0.6, "learning_rate": 1.2209996488353037e-07, "logits/chosen": -2.543335437774658, "logits/rejected": -2.8048582077026367, "logps/chosen": -474.1484069824219, "logps/rejected": -217.5401611328125, "loss": 1.3623, "rewards/accuracies": 0.375, "rewards/chosen": -1.6533265113830566, "rewards/margins": -0.558960497379303, "rewards/rejected": -1.0943658351898193, "step": 5197 }, { "epoch": 0.6, "learning_rate": 1.2206484841390612e-07, "logits/chosen": -3.136906862258911, "logits/rejected": -3.1081228256225586, "logps/chosen": -268.7641906738281, "logps/rejected": -438.28350830078125, "loss": 0.5438, "rewards/accuracies": 0.625, "rewards/chosen": -0.45171698927879333, "rewards/margins": 1.0462948083877563, "rewards/rejected": -1.498011827468872, "step": 5198 }, { "epoch": 0.6, "learning_rate": 1.2202973194428185e-07, "logits/chosen": -3.0721583366394043, "logits/rejected": -2.9507555961608887, "logps/chosen": -182.5630340576172, "logps/rejected": -335.062255859375, "loss": 0.3415, "rewards/accuracies": 0.75, "rewards/chosen": 0.004327267408370972, "rewards/margins": 3.069530487060547, "rewards/rejected": -3.0652031898498535, "step": 5199 }, { "epoch": 0.6, "learning_rate": 1.219946154746576e-07, "logits/chosen": -3.632455348968506, "logits/rejected": -3.5064611434936523, "logps/chosen": -273.2044372558594, "logps/rejected": -248.01242065429688, "loss": 0.3583, "rewards/accuracies": 0.75, "rewards/chosen": -0.23595888912677765, "rewards/margins": 2.087024688720703, "rewards/rejected": -2.322983741760254, "step": 5200 }, { "epoch": 0.6, "learning_rate": 1.2195949900503336e-07, "logits/chosen": -2.843179225921631, "logits/rejected": -2.407686471939087, "logps/chosen": -297.7922668457031, "logps/rejected": -349.45745849609375, "loss": 0.5948, "rewards/accuracies": 0.75, "rewards/chosen": -0.15714681148529053, "rewards/margins": 0.7533746957778931, "rewards/rejected": -0.9105215072631836, "step": 5201 }, { "epoch": 0.6, "learning_rate": 1.2192438253540909e-07, "logits/chosen": -3.062192916870117, "logits/rejected": -2.9286723136901855, "logps/chosen": -416.03021240234375, "logps/rejected": -348.7435302734375, "loss": 0.2691, "rewards/accuracies": 0.875, "rewards/chosen": 0.33860456943511963, "rewards/margins": 2.4508655071258545, "rewards/rejected": -2.1122610569000244, "step": 5202 }, { "epoch": 0.6, "learning_rate": 1.2188926606578484e-07, "logits/chosen": -2.5191261768341064, "logits/rejected": -2.333369731903076, "logps/chosen": -235.30941772460938, "logps/rejected": -322.86285400390625, "loss": 0.4108, "rewards/accuracies": 0.75, "rewards/chosen": 0.2562994062900543, "rewards/margins": 1.2130556106567383, "rewards/rejected": -0.9567563533782959, "step": 5203 }, { "epoch": 0.6, "learning_rate": 1.218541495961606e-07, "logits/chosen": -1.9033299684524536, "logits/rejected": -1.866495132446289, "logps/chosen": -191.02035522460938, "logps/rejected": -246.0564727783203, "loss": 0.3568, "rewards/accuracies": 0.75, "rewards/chosen": 0.11245231330394745, "rewards/margins": 1.682666540145874, "rewards/rejected": -1.5702142715454102, "step": 5204 }, { "epoch": 0.6, "learning_rate": 1.2181903312653635e-07, "logits/chosen": -3.030803680419922, "logits/rejected": -3.0483882427215576, "logps/chosen": -363.9146728515625, "logps/rejected": -333.9434509277344, "loss": 0.1629, "rewards/accuracies": 1.0, "rewards/chosen": 0.3156580328941345, "rewards/margins": 2.3087282180786133, "rewards/rejected": -1.993070363998413, "step": 5205 }, { "epoch": 0.6, "learning_rate": 1.2178391665691207e-07, "logits/chosen": -2.2458279132843018, "logits/rejected": -2.2214837074279785, "logps/chosen": -403.5029602050781, "logps/rejected": -270.57659912109375, "loss": 0.4705, "rewards/accuracies": 0.75, "rewards/chosen": 0.059605419635772705, "rewards/margins": 1.6680136919021606, "rewards/rejected": -1.6084082126617432, "step": 5206 }, { "epoch": 0.6, "learning_rate": 1.2174880018728783e-07, "logits/chosen": -2.8312740325927734, "logits/rejected": -2.9783174991607666, "logps/chosen": -341.7548828125, "logps/rejected": -371.0419921875, "loss": 0.2661, "rewards/accuracies": 0.875, "rewards/chosen": 0.10385347902774811, "rewards/margins": 3.9140877723693848, "rewards/rejected": -3.8102340698242188, "step": 5207 }, { "epoch": 0.6, "learning_rate": 1.2171368371766358e-07, "logits/chosen": -3.7071492671966553, "logits/rejected": -3.680072069168091, "logps/chosen": -239.55862426757812, "logps/rejected": -337.0492858886719, "loss": 0.5341, "rewards/accuracies": 0.875, "rewards/chosen": -0.6820453405380249, "rewards/margins": 2.6396238803863525, "rewards/rejected": -3.321669340133667, "step": 5208 }, { "epoch": 0.6, "learning_rate": 1.2167856724803934e-07, "logits/chosen": -3.2911148071289062, "logits/rejected": -3.235689640045166, "logps/chosen": -207.2025146484375, "logps/rejected": -197.9849853515625, "loss": 0.3571, "rewards/accuracies": 0.75, "rewards/chosen": -0.2274639904499054, "rewards/margins": 2.2422285079956055, "rewards/rejected": -2.4696924686431885, "step": 5209 }, { "epoch": 0.6, "learning_rate": 1.2164345077841506e-07, "logits/chosen": -3.667083263397217, "logits/rejected": -3.7753007411956787, "logps/chosen": -202.90139770507812, "logps/rejected": -233.2733154296875, "loss": 0.5496, "rewards/accuracies": 0.625, "rewards/chosen": -0.2940638065338135, "rewards/margins": 1.0239603519439697, "rewards/rejected": -1.3180241584777832, "step": 5210 }, { "epoch": 0.6, "learning_rate": 1.2160833430879082e-07, "logits/chosen": -3.4138057231903076, "logits/rejected": -3.7077465057373047, "logps/chosen": -123.68051147460938, "logps/rejected": -201.48062133789062, "loss": 0.1272, "rewards/accuracies": 1.0, "rewards/chosen": 1.0331110954284668, "rewards/margins": 2.8269762992858887, "rewards/rejected": -1.7938652038574219, "step": 5211 }, { "epoch": 0.6, "learning_rate": 1.2157321783916657e-07, "logits/chosen": -3.430452585220337, "logits/rejected": -3.331547975540161, "logps/chosen": -277.80694580078125, "logps/rejected": -206.60984802246094, "loss": 0.5471, "rewards/accuracies": 0.75, "rewards/chosen": -0.01072278618812561, "rewards/margins": 1.1077311038970947, "rewards/rejected": -1.118453860282898, "step": 5212 }, { "epoch": 0.6, "learning_rate": 1.215381013695423e-07, "logits/chosen": -3.344498872756958, "logits/rejected": -3.419649600982666, "logps/chosen": -267.4978942871094, "logps/rejected": -247.4333953857422, "loss": 0.5408, "rewards/accuracies": 0.75, "rewards/chosen": 0.39230096340179443, "rewards/margins": 1.9568054676055908, "rewards/rejected": -1.564504623413086, "step": 5213 }, { "epoch": 0.6, "learning_rate": 1.2150298489991805e-07, "logits/chosen": -2.718519926071167, "logits/rejected": -3.027796745300293, "logps/chosen": -199.85960388183594, "logps/rejected": -308.43450927734375, "loss": 0.4801, "rewards/accuracies": 0.75, "rewards/chosen": -0.5378886461257935, "rewards/margins": 1.626355767250061, "rewards/rejected": -2.1642444133758545, "step": 5214 }, { "epoch": 0.6, "learning_rate": 1.214678684302938e-07, "logits/chosen": -2.26768159866333, "logits/rejected": -2.3641233444213867, "logps/chosen": -269.4970397949219, "logps/rejected": -178.1766357421875, "loss": 0.8523, "rewards/accuracies": 0.625, "rewards/chosen": -0.658146858215332, "rewards/margins": 1.2302989959716797, "rewards/rejected": -1.8884457349777222, "step": 5215 }, { "epoch": 0.6, "learning_rate": 1.2143275196066956e-07, "logits/chosen": -3.828339099884033, "logits/rejected": -3.438283920288086, "logps/chosen": -312.7521667480469, "logps/rejected": -286.481201171875, "loss": 0.4256, "rewards/accuracies": 0.875, "rewards/chosen": -0.08610451221466064, "rewards/margins": 2.0246944427490234, "rewards/rejected": -2.1107988357543945, "step": 5216 }, { "epoch": 0.6, "learning_rate": 1.213976354910453e-07, "logits/chosen": -3.1766724586486816, "logits/rejected": -3.0177972316741943, "logps/chosen": -516.7952880859375, "logps/rejected": -344.722900390625, "loss": 0.2547, "rewards/accuracies": 1.0, "rewards/chosen": -0.31025010347366333, "rewards/margins": 2.1715352535247803, "rewards/rejected": -2.481785297393799, "step": 5217 }, { "epoch": 0.6, "learning_rate": 1.2136251902142104e-07, "logits/chosen": -3.464975357055664, "logits/rejected": -3.0747368335723877, "logps/chosen": -229.38645935058594, "logps/rejected": -283.61602783203125, "loss": 0.4442, "rewards/accuracies": 0.75, "rewards/chosen": 0.28603190183639526, "rewards/margins": 1.935234546661377, "rewards/rejected": -1.649202585220337, "step": 5218 }, { "epoch": 0.6, "learning_rate": 1.2132740255179677e-07, "logits/chosen": -3.1373889446258545, "logits/rejected": -2.7044944763183594, "logps/chosen": -286.94097900390625, "logps/rejected": -219.58468627929688, "loss": 0.4144, "rewards/accuracies": 0.75, "rewards/chosen": -0.576767086982727, "rewards/margins": 1.6572036743164062, "rewards/rejected": -2.2339706420898438, "step": 5219 }, { "epoch": 0.6, "learning_rate": 1.2129228608217255e-07, "logits/chosen": -2.6105387210845947, "logits/rejected": -2.6753692626953125, "logps/chosen": -369.8568115234375, "logps/rejected": -217.5819549560547, "loss": 0.5244, "rewards/accuracies": 0.75, "rewards/chosen": -0.3486417531967163, "rewards/margins": 0.8533391356468201, "rewards/rejected": -1.2019808292388916, "step": 5220 }, { "epoch": 0.6, "learning_rate": 1.2125716961254828e-07, "logits/chosen": -2.804413080215454, "logits/rejected": -2.661242723464966, "logps/chosen": -366.040283203125, "logps/rejected": -367.112548828125, "loss": 0.2683, "rewards/accuracies": 0.875, "rewards/chosen": 0.247966006398201, "rewards/margins": 2.6256794929504395, "rewards/rejected": -2.377713441848755, "step": 5221 }, { "epoch": 0.6, "learning_rate": 1.2122205314292403e-07, "logits/chosen": -2.782045841217041, "logits/rejected": -3.1047775745391846, "logps/chosen": -159.67384338378906, "logps/rejected": -303.17169189453125, "loss": 0.5111, "rewards/accuracies": 0.75, "rewards/chosen": 0.019940122961997986, "rewards/margins": 1.6275315284729004, "rewards/rejected": -1.6075913906097412, "step": 5222 }, { "epoch": 0.6, "learning_rate": 1.2118693667329976e-07, "logits/chosen": -2.593538761138916, "logits/rejected": -2.3662805557250977, "logps/chosen": -228.2781219482422, "logps/rejected": -239.7020721435547, "loss": 0.2843, "rewards/accuracies": 1.0, "rewards/chosen": 0.1524525135755539, "rewards/margins": 1.3987566232681274, "rewards/rejected": -1.2463042736053467, "step": 5223 }, { "epoch": 0.6, "learning_rate": 1.2115182020367554e-07, "logits/chosen": -3.44091534614563, "logits/rejected": -3.477853775024414, "logps/chosen": -115.70633697509766, "logps/rejected": -182.95123291015625, "loss": 0.4854, "rewards/accuracies": 0.75, "rewards/chosen": -0.02864648401737213, "rewards/margins": 1.3936532735824585, "rewards/rejected": -1.4222997426986694, "step": 5224 }, { "epoch": 0.6, "learning_rate": 1.2111670373405127e-07, "logits/chosen": -2.741844892501831, "logits/rejected": -2.879951000213623, "logps/chosen": -240.1184539794922, "logps/rejected": -273.3140563964844, "loss": 0.3415, "rewards/accuracies": 1.0, "rewards/chosen": -0.4107971787452698, "rewards/margins": 1.5568939447402954, "rewards/rejected": -1.9676910638809204, "step": 5225 }, { "epoch": 0.6, "learning_rate": 1.2108158726442702e-07, "logits/chosen": -3.196927070617676, "logits/rejected": -3.275141716003418, "logps/chosen": -195.29122924804688, "logps/rejected": -234.3670654296875, "loss": 0.5083, "rewards/accuracies": 0.75, "rewards/chosen": -0.07945594191551208, "rewards/margins": 1.788076400756836, "rewards/rejected": -1.8675322532653809, "step": 5226 }, { "epoch": 0.6, "learning_rate": 1.2104647079480275e-07, "logits/chosen": -2.665973663330078, "logits/rejected": -2.7045369148254395, "logps/chosen": -232.06036376953125, "logps/rejected": -186.01589965820312, "loss": 0.4748, "rewards/accuracies": 0.875, "rewards/chosen": 0.12026205658912659, "rewards/margins": 0.8856464624404907, "rewards/rejected": -0.7653844356536865, "step": 5227 }, { "epoch": 0.6, "learning_rate": 1.210113543251785e-07, "logits/chosen": -2.227813482284546, "logits/rejected": -2.634087562561035, "logps/chosen": -332.4530334472656, "logps/rejected": -444.0452575683594, "loss": 0.3683, "rewards/accuracies": 0.75, "rewards/chosen": -0.15424798429012299, "rewards/margins": 1.6662102937698364, "rewards/rejected": -1.8204580545425415, "step": 5228 }, { "epoch": 0.6, "learning_rate": 1.2097623785555425e-07, "logits/chosen": -3.177196502685547, "logits/rejected": -2.92907977104187, "logps/chosen": -162.54293823242188, "logps/rejected": -154.77243041992188, "loss": 0.3644, "rewards/accuracies": 0.875, "rewards/chosen": -0.17443682253360748, "rewards/margins": 1.363032341003418, "rewards/rejected": -1.5374690294265747, "step": 5229 }, { "epoch": 0.6, "learning_rate": 1.2094112138592998e-07, "logits/chosen": -3.122189521789551, "logits/rejected": -3.016157865524292, "logps/chosen": -227.82876586914062, "logps/rejected": -248.71435546875, "loss": 0.3712, "rewards/accuracies": 1.0, "rewards/chosen": -0.088976189494133, "rewards/margins": 0.8735700845718384, "rewards/rejected": -0.9625463485717773, "step": 5230 }, { "epoch": 0.6, "learning_rate": 1.2090600491630574e-07, "logits/chosen": -3.0521514415740967, "logits/rejected": -2.9738926887512207, "logps/chosen": -329.7683410644531, "logps/rejected": -276.89349365234375, "loss": 0.3762, "rewards/accuracies": 0.75, "rewards/chosen": -0.20828092098236084, "rewards/margins": 1.4070470333099365, "rewards/rejected": -1.6153278350830078, "step": 5231 }, { "epoch": 0.6, "learning_rate": 1.208708884466815e-07, "logits/chosen": -3.993516445159912, "logits/rejected": -3.5869407653808594, "logps/chosen": -324.1094665527344, "logps/rejected": -201.15512084960938, "loss": 0.9559, "rewards/accuracies": 0.625, "rewards/chosen": -0.8842984437942505, "rewards/margins": 0.6926509737968445, "rewards/rejected": -1.5769493579864502, "step": 5232 }, { "epoch": 0.6, "learning_rate": 1.2083577197705724e-07, "logits/chosen": -2.706437110900879, "logits/rejected": -2.5001683235168457, "logps/chosen": -247.0949249267578, "logps/rejected": -440.0496520996094, "loss": 0.1252, "rewards/accuracies": 1.0, "rewards/chosen": 0.25635719299316406, "rewards/margins": 2.8700578212738037, "rewards/rejected": -2.6137006282806396, "step": 5233 }, { "epoch": 0.6, "learning_rate": 1.2080065550743297e-07, "logits/chosen": -3.5473976135253906, "logits/rejected": -3.609318733215332, "logps/chosen": -204.4401397705078, "logps/rejected": -232.96856689453125, "loss": 0.2044, "rewards/accuracies": 0.875, "rewards/chosen": 0.03684224188327789, "rewards/margins": 2.7051258087158203, "rewards/rejected": -2.668283700942993, "step": 5234 }, { "epoch": 0.6, "learning_rate": 1.2076553903780872e-07, "logits/chosen": -3.4690723419189453, "logits/rejected": -4.082224369049072, "logps/chosen": -172.59579467773438, "logps/rejected": -335.96820068359375, "loss": 0.1719, "rewards/accuracies": 1.0, "rewards/chosen": 0.791286051273346, "rewards/margins": 3.591503858566284, "rewards/rejected": -2.800217628479004, "step": 5235 }, { "epoch": 0.6, "learning_rate": 1.2073042256818448e-07, "logits/chosen": -2.8833184242248535, "logits/rejected": -2.8643088340759277, "logps/chosen": -324.9278564453125, "logps/rejected": -269.1184387207031, "loss": 0.2847, "rewards/accuracies": 1.0, "rewards/chosen": -0.4237309396266937, "rewards/margins": 1.2329367399215698, "rewards/rejected": -1.656667709350586, "step": 5236 }, { "epoch": 0.6, "learning_rate": 1.2069530609856023e-07, "logits/chosen": -3.2410712242126465, "logits/rejected": -3.141294240951538, "logps/chosen": -318.8669128417969, "logps/rejected": -263.40020751953125, "loss": 0.2148, "rewards/accuracies": 0.875, "rewards/chosen": -0.5544151663780212, "rewards/margins": 2.312713384628296, "rewards/rejected": -2.867128610610962, "step": 5237 }, { "epoch": 0.6, "learning_rate": 1.2066018962893596e-07, "logits/chosen": -4.029869079589844, "logits/rejected": -3.990821361541748, "logps/chosen": -429.2679443359375, "logps/rejected": -218.78884887695312, "loss": 0.5345, "rewards/accuracies": 0.75, "rewards/chosen": -0.2725607752799988, "rewards/margins": 1.8695515394210815, "rewards/rejected": -2.1421122550964355, "step": 5238 }, { "epoch": 0.6, "learning_rate": 1.2062507315931171e-07, "logits/chosen": -2.9293506145477295, "logits/rejected": -3.1132500171661377, "logps/chosen": -274.43902587890625, "logps/rejected": -281.4142150878906, "loss": 0.4084, "rewards/accuracies": 0.875, "rewards/chosen": -0.4382481575012207, "rewards/margins": 1.106748104095459, "rewards/rejected": -1.5449962615966797, "step": 5239 }, { "epoch": 0.6, "learning_rate": 1.2058995668968744e-07, "logits/chosen": -2.7817864418029785, "logits/rejected": -2.712226390838623, "logps/chosen": -363.6641845703125, "logps/rejected": -339.00439453125, "loss": 0.7376, "rewards/accuracies": 0.625, "rewards/chosen": -0.07091806828975677, "rewards/margins": 1.5792995691299438, "rewards/rejected": -1.6502177715301514, "step": 5240 }, { "epoch": 0.6, "learning_rate": 1.2055484022006322e-07, "logits/chosen": -2.2478151321411133, "logits/rejected": -2.2379281520843506, "logps/chosen": -345.2948303222656, "logps/rejected": -207.29713439941406, "loss": 0.1363, "rewards/accuracies": 1.0, "rewards/chosen": 0.3363218605518341, "rewards/margins": 2.583042621612549, "rewards/rejected": -2.246720790863037, "step": 5241 }, { "epoch": 0.6, "learning_rate": 1.2051972375043895e-07, "logits/chosen": -2.9631898403167725, "logits/rejected": -3.1933281421661377, "logps/chosen": -326.3813171386719, "logps/rejected": -303.8610534667969, "loss": 0.588, "rewards/accuracies": 0.875, "rewards/chosen": -0.01886656880378723, "rewards/margins": 2.3021111488342285, "rewards/rejected": -2.3209774494171143, "step": 5242 }, { "epoch": 0.6, "learning_rate": 1.204846072808147e-07, "logits/chosen": -3.171943187713623, "logits/rejected": -2.9726765155792236, "logps/chosen": -303.70343017578125, "logps/rejected": -288.77178955078125, "loss": 0.4872, "rewards/accuracies": 0.625, "rewards/chosen": 0.060552701354026794, "rewards/margins": 2.048556089401245, "rewards/rejected": -1.9880033731460571, "step": 5243 }, { "epoch": 0.6, "learning_rate": 1.2044949081119043e-07, "logits/chosen": -2.5231645107269287, "logits/rejected": -2.7960026264190674, "logps/chosen": -239.24803161621094, "logps/rejected": -311.535400390625, "loss": 0.2408, "rewards/accuracies": 0.875, "rewards/chosen": 0.19614994525909424, "rewards/margins": 2.4837446212768555, "rewards/rejected": -2.287594795227051, "step": 5244 }, { "epoch": 0.6, "learning_rate": 1.2041437434156618e-07, "logits/chosen": -2.4150071144104004, "logits/rejected": -2.3548648357391357, "logps/chosen": -402.8688659667969, "logps/rejected": -357.9402160644531, "loss": 0.5995, "rewards/accuracies": 0.5, "rewards/chosen": -0.6132261157035828, "rewards/margins": 1.1073737144470215, "rewards/rejected": -1.720599889755249, "step": 5245 }, { "epoch": 0.6, "learning_rate": 1.2037925787194194e-07, "logits/chosen": -3.1932194232940674, "logits/rejected": -3.2763094902038574, "logps/chosen": -267.24530029296875, "logps/rejected": -412.2287902832031, "loss": 0.4563, "rewards/accuracies": 0.75, "rewards/chosen": 0.2812500596046448, "rewards/margins": 1.5769884586334229, "rewards/rejected": -1.2957382202148438, "step": 5246 }, { "epoch": 0.6, "learning_rate": 1.2034414140231767e-07, "logits/chosen": -2.9148612022399902, "logits/rejected": -2.9143834114074707, "logps/chosen": -302.7430114746094, "logps/rejected": -226.05499267578125, "loss": 0.4157, "rewards/accuracies": 0.875, "rewards/chosen": -0.2111915498971939, "rewards/margins": 1.7067968845367432, "rewards/rejected": -1.9179884195327759, "step": 5247 }, { "epoch": 0.6, "learning_rate": 1.2030902493269342e-07, "logits/chosen": -3.700883150100708, "logits/rejected": -3.5394699573516846, "logps/chosen": -252.55007934570312, "logps/rejected": -296.26751708984375, "loss": 0.3648, "rewards/accuracies": 0.75, "rewards/chosen": -0.16407988965511322, "rewards/margins": 1.4042044878005981, "rewards/rejected": -1.568284273147583, "step": 5248 }, { "epoch": 0.61, "learning_rate": 1.2027390846306917e-07, "logits/chosen": -3.3435847759246826, "logits/rejected": -3.6531989574432373, "logps/chosen": -281.9114685058594, "logps/rejected": -149.0885467529297, "loss": 0.4394, "rewards/accuracies": 0.75, "rewards/chosen": -0.3497462272644043, "rewards/margins": 0.9830405712127686, "rewards/rejected": -1.3327867984771729, "step": 5249 }, { "epoch": 0.61, "learning_rate": 1.2023879199344493e-07, "logits/chosen": -3.815300941467285, "logits/rejected": -3.3272323608398438, "logps/chosen": -317.18035888671875, "logps/rejected": -256.69805908203125, "loss": 0.4156, "rewards/accuracies": 0.75, "rewards/chosen": -0.04160308837890625, "rewards/margins": 2.046069383621216, "rewards/rejected": -2.087672472000122, "step": 5250 }, { "epoch": 0.61, "learning_rate": 1.2020367552382065e-07, "logits/chosen": -2.4591777324676514, "logits/rejected": -2.8692550659179688, "logps/chosen": -167.66961669921875, "logps/rejected": -271.9220886230469, "loss": 0.2176, "rewards/accuracies": 0.875, "rewards/chosen": 0.09552323818206787, "rewards/margins": 3.819779396057129, "rewards/rejected": -3.7242558002471924, "step": 5251 }, { "epoch": 0.61, "learning_rate": 1.201685590541964e-07, "logits/chosen": -3.1461150646209717, "logits/rejected": -3.147984743118286, "logps/chosen": -196.43641662597656, "logps/rejected": -325.5423583984375, "loss": 0.1833, "rewards/accuracies": 1.0, "rewards/chosen": 0.18382255733013153, "rewards/margins": 2.732227325439453, "rewards/rejected": -2.5484049320220947, "step": 5252 }, { "epoch": 0.61, "learning_rate": 1.2013344258457216e-07, "logits/chosen": -2.6345555782318115, "logits/rejected": -2.861504316329956, "logps/chosen": -325.9178466796875, "logps/rejected": -274.4767150878906, "loss": 0.4591, "rewards/accuracies": 0.625, "rewards/chosen": -0.518799901008606, "rewards/margins": 1.3637924194335938, "rewards/rejected": -1.8825924396514893, "step": 5253 }, { "epoch": 0.61, "learning_rate": 1.2009832611494792e-07, "logits/chosen": -2.8335118293762207, "logits/rejected": -2.546264171600342, "logps/chosen": -333.8028564453125, "logps/rejected": -265.1710205078125, "loss": 0.8559, "rewards/accuracies": 0.625, "rewards/chosen": -0.21729569137096405, "rewards/margins": 0.4414712190628052, "rewards/rejected": -0.6587669253349304, "step": 5254 }, { "epoch": 0.61, "learning_rate": 1.2006320964532364e-07, "logits/chosen": -2.810483455657959, "logits/rejected": -2.869291305541992, "logps/chosen": -271.9112548828125, "logps/rejected": -205.33668518066406, "loss": 0.4359, "rewards/accuracies": 0.875, "rewards/chosen": 0.04997949302196503, "rewards/margins": 1.3843660354614258, "rewards/rejected": -1.3343865871429443, "step": 5255 }, { "epoch": 0.61, "learning_rate": 1.200280931756994e-07, "logits/chosen": -3.0189099311828613, "logits/rejected": -3.150319814682007, "logps/chosen": -314.3910217285156, "logps/rejected": -274.66558837890625, "loss": 0.4225, "rewards/accuracies": 0.875, "rewards/chosen": 0.08220675587654114, "rewards/margins": 1.5590273141860962, "rewards/rejected": -1.4768205881118774, "step": 5256 }, { "epoch": 0.61, "learning_rate": 1.1999297670607515e-07, "logits/chosen": -3.6237306594848633, "logits/rejected": -3.667257785797119, "logps/chosen": -138.72219848632812, "logps/rejected": -225.99139404296875, "loss": 0.3974, "rewards/accuracies": 0.75, "rewards/chosen": -0.3245377540588379, "rewards/margins": 1.8181201219558716, "rewards/rejected": -2.142657995223999, "step": 5257 }, { "epoch": 0.61, "learning_rate": 1.199578602364509e-07, "logits/chosen": -2.6627988815307617, "logits/rejected": -3.0449540615081787, "logps/chosen": -229.34292602539062, "logps/rejected": -360.1202392578125, "loss": 0.2943, "rewards/accuracies": 0.75, "rewards/chosen": -0.08262103796005249, "rewards/margins": 2.7906956672668457, "rewards/rejected": -2.873316526412964, "step": 5258 }, { "epoch": 0.61, "learning_rate": 1.1992274376682663e-07, "logits/chosen": -2.2099671363830566, "logits/rejected": -2.3426947593688965, "logps/chosen": -262.30718994140625, "logps/rejected": -304.1112365722656, "loss": 0.8665, "rewards/accuracies": 0.5, "rewards/chosen": -0.5446299314498901, "rewards/margins": 1.161249041557312, "rewards/rejected": -1.7058790922164917, "step": 5259 }, { "epoch": 0.61, "learning_rate": 1.1988762729720239e-07, "logits/chosen": -3.844816207885742, "logits/rejected": -4.067734718322754, "logps/chosen": -145.03819274902344, "logps/rejected": -282.3373718261719, "loss": 0.3722, "rewards/accuracies": 0.875, "rewards/chosen": -0.2555675208568573, "rewards/margins": 3.2537617683410645, "rewards/rejected": -3.509329319000244, "step": 5260 }, { "epoch": 0.61, "learning_rate": 1.1985251082757814e-07, "logits/chosen": -3.0935184955596924, "logits/rejected": -3.371138572692871, "logps/chosen": -281.9754638671875, "logps/rejected": -285.90301513671875, "loss": 0.2597, "rewards/accuracies": 0.875, "rewards/chosen": 0.24172136187553406, "rewards/margins": 2.565387725830078, "rewards/rejected": -2.3236663341522217, "step": 5261 }, { "epoch": 0.61, "learning_rate": 1.1981739435795387e-07, "logits/chosen": -2.8428263664245605, "logits/rejected": -2.8018476963043213, "logps/chosen": -305.2217102050781, "logps/rejected": -300.6449890136719, "loss": 0.5926, "rewards/accuracies": 0.875, "rewards/chosen": -0.7708445191383362, "rewards/margins": 1.2593092918395996, "rewards/rejected": -2.030153751373291, "step": 5262 }, { "epoch": 0.61, "learning_rate": 1.1978227788832962e-07, "logits/chosen": -3.1400656700134277, "logits/rejected": -3.187441110610962, "logps/chosen": -177.19375610351562, "logps/rejected": -205.10202026367188, "loss": 0.2246, "rewards/accuracies": 1.0, "rewards/chosen": 0.10997073352336884, "rewards/margins": 2.0718464851379395, "rewards/rejected": -1.9618757963180542, "step": 5263 }, { "epoch": 0.61, "learning_rate": 1.1974716141870535e-07, "logits/chosen": -2.060303211212158, "logits/rejected": -1.9244282245635986, "logps/chosen": -248.10623168945312, "logps/rejected": -204.86111450195312, "loss": 0.5644, "rewards/accuracies": 0.625, "rewards/chosen": -0.10428924113512039, "rewards/margins": 0.6631040573120117, "rewards/rejected": -0.7673932313919067, "step": 5264 }, { "epoch": 0.61, "learning_rate": 1.1971204494908113e-07, "logits/chosen": -2.9772720336914062, "logits/rejected": -2.8204245567321777, "logps/chosen": -343.1988220214844, "logps/rejected": -278.396728515625, "loss": 0.6047, "rewards/accuracies": 0.625, "rewards/chosen": 0.18444335460662842, "rewards/margins": 0.6080455780029297, "rewards/rejected": -0.4236021637916565, "step": 5265 }, { "epoch": 0.61, "learning_rate": 1.1967692847945686e-07, "logits/chosen": -3.187317132949829, "logits/rejected": -3.0652551651000977, "logps/chosen": -154.0248260498047, "logps/rejected": -182.4474639892578, "loss": 0.3734, "rewards/accuracies": 0.875, "rewards/chosen": -0.024674534797668457, "rewards/margins": 1.5339751243591309, "rewards/rejected": -1.5586495399475098, "step": 5266 }, { "epoch": 0.61, "learning_rate": 1.196418120098326e-07, "logits/chosen": -3.5659375190734863, "logits/rejected": -3.2536399364471436, "logps/chosen": -319.11846923828125, "logps/rejected": -219.90380859375, "loss": 0.3997, "rewards/accuracies": 0.875, "rewards/chosen": -0.20624789595603943, "rewards/margins": 1.5621837377548218, "rewards/rejected": -1.7684316635131836, "step": 5267 }, { "epoch": 0.61, "learning_rate": 1.1960669554020834e-07, "logits/chosen": -2.498556137084961, "logits/rejected": -2.259441614151001, "logps/chosen": -458.2297058105469, "logps/rejected": -367.47039794921875, "loss": 0.1813, "rewards/accuracies": 1.0, "rewards/chosen": 0.08534112572669983, "rewards/margins": 2.304173707962036, "rewards/rejected": -2.218832492828369, "step": 5268 }, { "epoch": 0.61, "learning_rate": 1.1957157907058412e-07, "logits/chosen": -2.806627035140991, "logits/rejected": -2.6817054748535156, "logps/chosen": -282.81439208984375, "logps/rejected": -341.064453125, "loss": 0.3562, "rewards/accuracies": 0.875, "rewards/chosen": -0.02530829608440399, "rewards/margins": 1.363014578819275, "rewards/rejected": -1.3883228302001953, "step": 5269 }, { "epoch": 0.61, "learning_rate": 1.1953646260095984e-07, "logits/chosen": -3.093775987625122, "logits/rejected": -3.0295724868774414, "logps/chosen": -261.1454162597656, "logps/rejected": -266.12225341796875, "loss": 0.4608, "rewards/accuracies": 0.5, "rewards/chosen": -0.04410265386104584, "rewards/margins": 1.73930025100708, "rewards/rejected": -1.7834028005599976, "step": 5270 }, { "epoch": 0.61, "learning_rate": 1.195013461313356e-07, "logits/chosen": -2.2236580848693848, "logits/rejected": -2.1952908039093018, "logps/chosen": -423.15618896484375, "logps/rejected": -427.3813781738281, "loss": 0.4028, "rewards/accuracies": 0.75, "rewards/chosen": 0.30229640007019043, "rewards/margins": 1.8198291063308716, "rewards/rejected": -1.5175325870513916, "step": 5271 }, { "epoch": 0.61, "learning_rate": 1.1946622966171133e-07, "logits/chosen": -3.947577476501465, "logits/rejected": -3.789827346801758, "logps/chosen": -256.69659423828125, "logps/rejected": -223.3997802734375, "loss": 0.4425, "rewards/accuracies": 0.75, "rewards/chosen": -0.11041039973497391, "rewards/margins": 1.3474764823913574, "rewards/rejected": -1.4578869342803955, "step": 5272 }, { "epoch": 0.61, "learning_rate": 1.1943111319208708e-07, "logits/chosen": -3.1630654335021973, "logits/rejected": -3.676985740661621, "logps/chosen": -212.4100341796875, "logps/rejected": -255.36795043945312, "loss": 0.1959, "rewards/accuracies": 1.0, "rewards/chosen": 0.43209031224250793, "rewards/margins": 3.3255698680877686, "rewards/rejected": -2.893479347229004, "step": 5273 }, { "epoch": 0.61, "learning_rate": 1.1939599672246283e-07, "logits/chosen": -2.889444351196289, "logits/rejected": -2.722604274749756, "logps/chosen": -341.0267639160156, "logps/rejected": -287.3748474121094, "loss": 0.3746, "rewards/accuracies": 0.875, "rewards/chosen": -0.677532434463501, "rewards/margins": 2.2087178230285645, "rewards/rejected": -2.8862500190734863, "step": 5274 }, { "epoch": 0.61, "learning_rate": 1.193608802528386e-07, "logits/chosen": -2.539132595062256, "logits/rejected": -2.625786304473877, "logps/chosen": -208.70423889160156, "logps/rejected": -242.12620544433594, "loss": 0.334, "rewards/accuracies": 0.875, "rewards/chosen": -0.05602100491523743, "rewards/margins": 1.6638544797897339, "rewards/rejected": -1.719875454902649, "step": 5275 }, { "epoch": 0.61, "learning_rate": 1.1932576378321432e-07, "logits/chosen": -2.5883026123046875, "logits/rejected": -2.6176249980926514, "logps/chosen": -244.16883850097656, "logps/rejected": -257.6977233886719, "loss": 0.5963, "rewards/accuracies": 0.75, "rewards/chosen": -0.3560296297073364, "rewards/margins": 2.2562255859375, "rewards/rejected": -2.612255334854126, "step": 5276 }, { "epoch": 0.61, "learning_rate": 1.1929064731359007e-07, "logits/chosen": -2.9746947288513184, "logits/rejected": -2.738182544708252, "logps/chosen": -133.9920654296875, "logps/rejected": -292.6603698730469, "loss": 0.5034, "rewards/accuracies": 0.75, "rewards/chosen": -0.2177162766456604, "rewards/margins": 1.9542315006256104, "rewards/rejected": -2.171947956085205, "step": 5277 }, { "epoch": 0.61, "learning_rate": 1.1925553084396582e-07, "logits/chosen": -3.16219425201416, "logits/rejected": -3.2775774002075195, "logps/chosen": -301.2888488769531, "logps/rejected": -271.2750244140625, "loss": 0.2548, "rewards/accuracies": 0.875, "rewards/chosen": 0.13881921768188477, "rewards/margins": 2.6677494049072266, "rewards/rejected": -2.528930425643921, "step": 5278 }, { "epoch": 0.61, "learning_rate": 1.1922041437434155e-07, "logits/chosen": -3.3047657012939453, "logits/rejected": -3.3548595905303955, "logps/chosen": -239.56040954589844, "logps/rejected": -255.6653289794922, "loss": 0.8374, "rewards/accuracies": 0.625, "rewards/chosen": -0.837917685508728, "rewards/margins": 0.8629946708679199, "rewards/rejected": -1.7009124755859375, "step": 5279 }, { "epoch": 0.61, "learning_rate": 1.191852979047173e-07, "logits/chosen": -3.5522146224975586, "logits/rejected": -3.5528056621551514, "logps/chosen": -212.634521484375, "logps/rejected": -294.1792907714844, "loss": 0.1479, "rewards/accuracies": 1.0, "rewards/chosen": 0.33663004636764526, "rewards/margins": 2.5563061237335205, "rewards/rejected": -2.2196762561798096, "step": 5280 }, { "epoch": 0.61, "learning_rate": 1.1915018143509306e-07, "logits/chosen": -3.1175241470336914, "logits/rejected": -3.093761920928955, "logps/chosen": -450.05426025390625, "logps/rejected": -337.426025390625, "loss": 0.4192, "rewards/accuracies": 0.75, "rewards/chosen": -0.04596281051635742, "rewards/margins": 2.2999346256256104, "rewards/rejected": -2.3458971977233887, "step": 5281 }, { "epoch": 0.61, "learning_rate": 1.191150649654688e-07, "logits/chosen": -3.022493839263916, "logits/rejected": -3.0333921909332275, "logps/chosen": -142.62327575683594, "logps/rejected": -248.74234008789062, "loss": 0.4135, "rewards/accuracies": 0.75, "rewards/chosen": -0.2471529096364975, "rewards/margins": 1.4975841045379639, "rewards/rejected": -1.744737148284912, "step": 5282 }, { "epoch": 0.61, "learning_rate": 1.1907994849584454e-07, "logits/chosen": -3.1336073875427246, "logits/rejected": -2.5719711780548096, "logps/chosen": -368.74749755859375, "logps/rejected": -241.32363891601562, "loss": 0.419, "rewards/accuracies": 0.875, "rewards/chosen": -0.8870683908462524, "rewards/margins": 1.1168632507324219, "rewards/rejected": -2.003931760787964, "step": 5283 }, { "epoch": 0.61, "learning_rate": 1.1904483202622029e-07, "logits/chosen": -3.1582255363464355, "logits/rejected": -3.1383910179138184, "logps/chosen": -270.9600830078125, "logps/rejected": -328.2778015136719, "loss": 0.308, "rewards/accuracies": 0.75, "rewards/chosen": -0.34702032804489136, "rewards/margins": 2.6627840995788574, "rewards/rejected": -3.0098042488098145, "step": 5284 }, { "epoch": 0.61, "learning_rate": 1.1900971555659605e-07, "logits/chosen": -3.577345848083496, "logits/rejected": -3.2390990257263184, "logps/chosen": -249.53189086914062, "logps/rejected": -143.78677368164062, "loss": 0.2213, "rewards/accuracies": 1.0, "rewards/chosen": -0.2536546587944031, "rewards/margins": 1.6147410869598389, "rewards/rejected": -1.8683958053588867, "step": 5285 }, { "epoch": 0.61, "learning_rate": 1.1897459908697179e-07, "logits/chosen": -3.1280903816223145, "logits/rejected": -3.047313690185547, "logps/chosen": -223.62655639648438, "logps/rejected": -235.99679565429688, "loss": 0.6442, "rewards/accuracies": 0.625, "rewards/chosen": -0.6268102526664734, "rewards/margins": 1.199461817741394, "rewards/rejected": -1.8262720108032227, "step": 5286 }, { "epoch": 0.61, "learning_rate": 1.1893948261734753e-07, "logits/chosen": -3.4842052459716797, "logits/rejected": -2.7985804080963135, "logps/chosen": -252.07977294921875, "logps/rejected": -250.7867431640625, "loss": 0.2884, "rewards/accuracies": 1.0, "rewards/chosen": -0.41090714931488037, "rewards/margins": 1.8253885507583618, "rewards/rejected": -2.236295700073242, "step": 5287 }, { "epoch": 0.61, "learning_rate": 1.1890436614772327e-07, "logits/chosen": -3.4553141593933105, "logits/rejected": -3.8400914669036865, "logps/chosen": -238.926025390625, "logps/rejected": -483.93255615234375, "loss": 0.2362, "rewards/accuracies": 0.875, "rewards/chosen": 0.08708344399929047, "rewards/margins": 2.877342939376831, "rewards/rejected": -2.790259599685669, "step": 5288 }, { "epoch": 0.61, "learning_rate": 1.1886924967809901e-07, "logits/chosen": -3.441469192504883, "logits/rejected": -3.3018500804901123, "logps/chosen": -265.0951232910156, "logps/rejected": -172.10955810546875, "loss": 0.8662, "rewards/accuracies": 0.375, "rewards/chosen": -0.5917856097221375, "rewards/margins": 0.12730883061885834, "rewards/rejected": -0.7190945148468018, "step": 5289 }, { "epoch": 0.61, "learning_rate": 1.1883413320847478e-07, "logits/chosen": -3.458289623260498, "logits/rejected": -3.163661003112793, "logps/chosen": -301.8624267578125, "logps/rejected": -285.4290771484375, "loss": 0.1464, "rewards/accuracies": 1.0, "rewards/chosen": 0.2466646134853363, "rewards/margins": 2.864305019378662, "rewards/rejected": -2.617640256881714, "step": 5290 }, { "epoch": 0.61, "learning_rate": 1.1879901673885052e-07, "logits/chosen": -3.9664559364318848, "logits/rejected": -3.89241886138916, "logps/chosen": -218.73037719726562, "logps/rejected": -169.672607421875, "loss": 0.2328, "rewards/accuracies": 1.0, "rewards/chosen": -0.3425738513469696, "rewards/margins": 1.9347800016403198, "rewards/rejected": -2.2773537635803223, "step": 5291 }, { "epoch": 0.61, "learning_rate": 1.1876390026922626e-07, "logits/chosen": -2.964613199234009, "logits/rejected": -2.864636182785034, "logps/chosen": -209.5089569091797, "logps/rejected": -266.8509216308594, "loss": 0.5364, "rewards/accuracies": 0.75, "rewards/chosen": -0.4176611304283142, "rewards/margins": 1.1993352174758911, "rewards/rejected": -1.61699640750885, "step": 5292 }, { "epoch": 0.61, "learning_rate": 1.18728783799602e-07, "logits/chosen": -3.19462251663208, "logits/rejected": -3.375669479370117, "logps/chosen": -165.86605834960938, "logps/rejected": -169.89500427246094, "loss": 0.4917, "rewards/accuracies": 0.75, "rewards/chosen": 0.19701166450977325, "rewards/margins": 1.4314093589782715, "rewards/rejected": -1.2343976497650146, "step": 5293 }, { "epoch": 0.61, "learning_rate": 1.1869366732997777e-07, "logits/chosen": -2.4611167907714844, "logits/rejected": -2.904947519302368, "logps/chosen": -150.60958862304688, "logps/rejected": -286.30218505859375, "loss": 0.5097, "rewards/accuracies": 0.75, "rewards/chosen": -0.1646120548248291, "rewards/margins": 1.449049711227417, "rewards/rejected": -1.613661766052246, "step": 5294 }, { "epoch": 0.61, "learning_rate": 1.186585508603535e-07, "logits/chosen": -2.7439794540405273, "logits/rejected": -2.790468692779541, "logps/chosen": -117.99046325683594, "logps/rejected": -242.6410675048828, "loss": 0.3835, "rewards/accuracies": 0.75, "rewards/chosen": -0.16659428179264069, "rewards/margins": 1.763291358947754, "rewards/rejected": -1.929885745048523, "step": 5295 }, { "epoch": 0.61, "learning_rate": 1.1862343439072925e-07, "logits/chosen": -2.7106122970581055, "logits/rejected": -2.639359951019287, "logps/chosen": -336.0431213378906, "logps/rejected": -280.45977783203125, "loss": 0.5486, "rewards/accuracies": 0.75, "rewards/chosen": -0.03731174021959305, "rewards/margins": 1.3584873676300049, "rewards/rejected": -1.395799160003662, "step": 5296 }, { "epoch": 0.61, "learning_rate": 1.1858831792110499e-07, "logits/chosen": -3.347933053970337, "logits/rejected": -3.309882402420044, "logps/chosen": -217.00970458984375, "logps/rejected": -189.84866333007812, "loss": 0.3179, "rewards/accuracies": 0.875, "rewards/chosen": -0.19347253441810608, "rewards/margins": 1.369217872619629, "rewards/rejected": -1.5626904964447021, "step": 5297 }, { "epoch": 0.61, "learning_rate": 1.1855320145148074e-07, "logits/chosen": -3.3293817043304443, "logits/rejected": -2.966381549835205, "logps/chosen": -204.59609985351562, "logps/rejected": -258.4775390625, "loss": 0.5751, "rewards/accuracies": 0.75, "rewards/chosen": -0.3843788206577301, "rewards/margins": 0.7772958874702454, "rewards/rejected": -1.1616747379302979, "step": 5298 }, { "epoch": 0.61, "learning_rate": 1.1851808498185648e-07, "logits/chosen": -3.2447218894958496, "logits/rejected": -3.351438522338867, "logps/chosen": -185.56190490722656, "logps/rejected": -254.40341186523438, "loss": 0.3343, "rewards/accuracies": 0.875, "rewards/chosen": -0.41642457246780396, "rewards/margins": 2.1154158115386963, "rewards/rejected": -2.5318403244018555, "step": 5299 }, { "epoch": 0.61, "learning_rate": 1.1848296851223222e-07, "logits/chosen": -3.2801432609558105, "logits/rejected": -2.935929298400879, "logps/chosen": -265.004150390625, "logps/rejected": -269.19580078125, "loss": 0.751, "rewards/accuracies": 0.625, "rewards/chosen": -0.111660435795784, "rewards/margins": 1.7392542362213135, "rewards/rejected": -1.8509145975112915, "step": 5300 }, { "epoch": 0.61, "learning_rate": 1.1844785204260798e-07, "logits/chosen": -3.5987958908081055, "logits/rejected": -3.50144362449646, "logps/chosen": -234.84075927734375, "logps/rejected": -238.41683959960938, "loss": 0.3501, "rewards/accuracies": 0.75, "rewards/chosen": 0.24521403014659882, "rewards/margins": 1.6754508018493652, "rewards/rejected": -1.4302366971969604, "step": 5301 }, { "epoch": 0.61, "learning_rate": 1.1841273557298373e-07, "logits/chosen": -3.4161148071289062, "logits/rejected": -3.103753089904785, "logps/chosen": -136.54122924804688, "logps/rejected": -205.8348388671875, "loss": 0.9316, "rewards/accuracies": 0.5, "rewards/chosen": -0.5553924441337585, "rewards/margins": 0.5703779458999634, "rewards/rejected": -1.1257704496383667, "step": 5302 }, { "epoch": 0.61, "learning_rate": 1.1837761910335947e-07, "logits/chosen": -3.3338518142700195, "logits/rejected": -3.2054007053375244, "logps/chosen": -472.4631652832031, "logps/rejected": -259.8365478515625, "loss": 0.2248, "rewards/accuracies": 0.875, "rewards/chosen": 0.5355492830276489, "rewards/margins": 2.5621094703674316, "rewards/rejected": -2.0265605449676514, "step": 5303 }, { "epoch": 0.61, "learning_rate": 1.1834250263373521e-07, "logits/chosen": -3.085245132446289, "logits/rejected": -3.0464587211608887, "logps/chosen": -454.04937744140625, "logps/rejected": -367.3216857910156, "loss": 0.4047, "rewards/accuracies": 0.875, "rewards/chosen": -0.22949331998825073, "rewards/margins": 1.540716528892517, "rewards/rejected": -1.770209789276123, "step": 5304 }, { "epoch": 0.61, "learning_rate": 1.1830738616411095e-07, "logits/chosen": -3.0679941177368164, "logits/rejected": -2.881873846054077, "logps/chosen": -376.23602294921875, "logps/rejected": -266.7079162597656, "loss": 0.3172, "rewards/accuracies": 0.875, "rewards/chosen": -0.2813473641872406, "rewards/margins": 2.0690431594848633, "rewards/rejected": -2.3503904342651367, "step": 5305 }, { "epoch": 0.61, "learning_rate": 1.1827226969448672e-07, "logits/chosen": -3.325892925262451, "logits/rejected": -3.5403671264648438, "logps/chosen": -146.9102783203125, "logps/rejected": -244.6565704345703, "loss": 0.32, "rewards/accuracies": 0.75, "rewards/chosen": -0.24200797080993652, "rewards/margins": 2.2373180389404297, "rewards/rejected": -2.479326009750366, "step": 5306 }, { "epoch": 0.61, "learning_rate": 1.1823715322486246e-07, "logits/chosen": -3.0236501693725586, "logits/rejected": -3.1799793243408203, "logps/chosen": -194.70164489746094, "logps/rejected": -193.9634552001953, "loss": 0.3651, "rewards/accuracies": 0.875, "rewards/chosen": 0.16030502319335938, "rewards/margins": 2.7424092292785645, "rewards/rejected": -2.582104444503784, "step": 5307 }, { "epoch": 0.61, "learning_rate": 1.182020367552382e-07, "logits/chosen": -3.430779457092285, "logits/rejected": -3.5485873222351074, "logps/chosen": -215.32264709472656, "logps/rejected": -279.6903991699219, "loss": 0.6442, "rewards/accuracies": 0.625, "rewards/chosen": -0.5702784061431885, "rewards/margins": 0.4695984125137329, "rewards/rejected": -1.0398766994476318, "step": 5308 }, { "epoch": 0.61, "learning_rate": 1.1816692028561394e-07, "logits/chosen": -3.5047433376312256, "logits/rejected": -3.1535704135894775, "logps/chosen": -475.0372314453125, "logps/rejected": -330.7916564941406, "loss": 0.2436, "rewards/accuracies": 0.875, "rewards/chosen": -0.1843947172164917, "rewards/margins": 3.481719732284546, "rewards/rejected": -3.666114568710327, "step": 5309 }, { "epoch": 0.61, "learning_rate": 1.181318038159897e-07, "logits/chosen": -2.5618462562561035, "logits/rejected": -2.5857903957366943, "logps/chosen": -343.82763671875, "logps/rejected": -261.24713134765625, "loss": 0.4493, "rewards/accuracies": 0.875, "rewards/chosen": -0.1268748790025711, "rewards/margins": 0.7392730712890625, "rewards/rejected": -0.8661479353904724, "step": 5310 }, { "epoch": 0.61, "learning_rate": 1.1809668734636545e-07, "logits/chosen": -2.926823139190674, "logits/rejected": -3.2674179077148438, "logps/chosen": -274.2376403808594, "logps/rejected": -322.9490966796875, "loss": 0.1251, "rewards/accuracies": 1.0, "rewards/chosen": 1.246821641921997, "rewards/margins": 3.441399574279785, "rewards/rejected": -2.194577693939209, "step": 5311 }, { "epoch": 0.61, "learning_rate": 1.1806157087674119e-07, "logits/chosen": -2.785946846008301, "logits/rejected": -3.198619842529297, "logps/chosen": -347.98626708984375, "logps/rejected": -524.7647705078125, "loss": 0.6095, "rewards/accuracies": 0.5, "rewards/chosen": -0.38966768980026245, "rewards/margins": 1.7886133193969727, "rewards/rejected": -2.178280830383301, "step": 5312 }, { "epoch": 0.61, "learning_rate": 1.1802645440711693e-07, "logits/chosen": -2.4815726280212402, "logits/rejected": -2.538074016571045, "logps/chosen": -125.90350341796875, "logps/rejected": -125.7213134765625, "loss": 0.3306, "rewards/accuracies": 1.0, "rewards/chosen": -0.16973868012428284, "rewards/margins": 1.1876661777496338, "rewards/rejected": -1.3574048280715942, "step": 5313 }, { "epoch": 0.61, "learning_rate": 1.1799133793749268e-07, "logits/chosen": -3.2826974391937256, "logits/rejected": -3.6301088333129883, "logps/chosen": -465.04888916015625, "logps/rejected": -274.7007141113281, "loss": 0.5097, "rewards/accuracies": 0.875, "rewards/chosen": -0.5384688377380371, "rewards/margins": 0.7397345304489136, "rewards/rejected": -1.2782033681869507, "step": 5314 }, { "epoch": 0.61, "learning_rate": 1.1795622146786842e-07, "logits/chosen": -3.179577350616455, "logits/rejected": -3.2284016609191895, "logps/chosen": -307.47021484375, "logps/rejected": -299.35601806640625, "loss": 0.5358, "rewards/accuracies": 0.75, "rewards/chosen": 0.043544963002204895, "rewards/margins": 1.4730011224746704, "rewards/rejected": -1.429456114768982, "step": 5315 }, { "epoch": 0.61, "learning_rate": 1.1792110499824416e-07, "logits/chosen": -3.4529571533203125, "logits/rejected": -3.3142149448394775, "logps/chosen": -260.7003479003906, "logps/rejected": -185.76760864257812, "loss": 0.4209, "rewards/accuracies": 0.75, "rewards/chosen": -0.38796547055244446, "rewards/margins": 1.4888010025024414, "rewards/rejected": -1.876766324043274, "step": 5316 }, { "epoch": 0.61, "learning_rate": 1.178859885286199e-07, "logits/chosen": -3.271704912185669, "logits/rejected": -3.2487998008728027, "logps/chosen": -238.91781616210938, "logps/rejected": -226.2290496826172, "loss": 0.5238, "rewards/accuracies": 0.875, "rewards/chosen": -0.16189655661582947, "rewards/margins": 1.5519782304763794, "rewards/rejected": -1.7138746976852417, "step": 5317 }, { "epoch": 0.61, "learning_rate": 1.1785087205899567e-07, "logits/chosen": -2.915142059326172, "logits/rejected": -2.652663469314575, "logps/chosen": -278.468017578125, "logps/rejected": -192.33761596679688, "loss": 0.5338, "rewards/accuracies": 0.875, "rewards/chosen": -0.5215287804603577, "rewards/margins": 1.0504300594329834, "rewards/rejected": -1.5719587802886963, "step": 5318 }, { "epoch": 0.61, "learning_rate": 1.1781575558937141e-07, "logits/chosen": -3.5180277824401855, "logits/rejected": -3.282268524169922, "logps/chosen": -241.83883666992188, "logps/rejected": -200.3997344970703, "loss": 0.2812, "rewards/accuracies": 0.875, "rewards/chosen": 0.15620733797550201, "rewards/margins": 1.6773450374603271, "rewards/rejected": -1.5211377143859863, "step": 5319 }, { "epoch": 0.61, "learning_rate": 1.1778063911974715e-07, "logits/chosen": -3.0525553226470947, "logits/rejected": -3.3098504543304443, "logps/chosen": -201.21319580078125, "logps/rejected": -275.56060791015625, "loss": 0.3635, "rewards/accuracies": 0.875, "rewards/chosen": -0.3383446931838989, "rewards/margins": 3.3931355476379395, "rewards/rejected": -3.731480598449707, "step": 5320 }, { "epoch": 0.61, "learning_rate": 1.177455226501229e-07, "logits/chosen": -2.9487924575805664, "logits/rejected": -2.9715561866760254, "logps/chosen": -362.1551513671875, "logps/rejected": -333.4438171386719, "loss": 0.5509, "rewards/accuracies": 0.625, "rewards/chosen": -0.14812664687633514, "rewards/margins": 1.1062774658203125, "rewards/rejected": -1.254404067993164, "step": 5321 }, { "epoch": 0.61, "learning_rate": 1.1771040618049866e-07, "logits/chosen": -3.723816394805908, "logits/rejected": -3.8011326789855957, "logps/chosen": -373.8204650878906, "logps/rejected": -288.3244934082031, "loss": 0.5041, "rewards/accuracies": 0.75, "rewards/chosen": -0.5491907596588135, "rewards/margins": 1.336431622505188, "rewards/rejected": -1.885622501373291, "step": 5322 }, { "epoch": 0.61, "learning_rate": 1.176752897108744e-07, "logits/chosen": -2.927650213241577, "logits/rejected": -2.8360512256622314, "logps/chosen": -284.93756103515625, "logps/rejected": -205.94961547851562, "loss": 0.2744, "rewards/accuracies": 0.875, "rewards/chosen": 0.3633912205696106, "rewards/margins": 1.5899953842163086, "rewards/rejected": -1.2266043424606323, "step": 5323 }, { "epoch": 0.61, "learning_rate": 1.1764017324125014e-07, "logits/chosen": -3.083552598953247, "logits/rejected": -3.436340808868408, "logps/chosen": -356.75830078125, "logps/rejected": -297.759765625, "loss": 0.6191, "rewards/accuracies": 0.75, "rewards/chosen": 0.2587321400642395, "rewards/margins": 0.8752649426460266, "rewards/rejected": -0.6165328025817871, "step": 5324 }, { "epoch": 0.61, "learning_rate": 1.1760505677162588e-07, "logits/chosen": -2.9998779296875, "logits/rejected": -2.7792530059814453, "logps/chosen": -282.32183837890625, "logps/rejected": -268.5966796875, "loss": 0.3836, "rewards/accuracies": 0.75, "rewards/chosen": 0.14703623950481415, "rewards/margins": 3.291665554046631, "rewards/rejected": -3.1446292400360107, "step": 5325 }, { "epoch": 0.61, "learning_rate": 1.1756994030200164e-07, "logits/chosen": -2.91245698928833, "logits/rejected": -2.907775402069092, "logps/chosen": -320.2054443359375, "logps/rejected": -227.05935668945312, "loss": 0.189, "rewards/accuracies": 1.0, "rewards/chosen": -0.1989031285047531, "rewards/margins": 1.9366915225982666, "rewards/rejected": -2.135594367980957, "step": 5326 }, { "epoch": 0.61, "learning_rate": 1.1753482383237738e-07, "logits/chosen": -2.9528565406799316, "logits/rejected": -3.032849073410034, "logps/chosen": -183.32455444335938, "logps/rejected": -265.6575622558594, "loss": 0.3302, "rewards/accuracies": 0.875, "rewards/chosen": -0.3145020604133606, "rewards/margins": 1.8275034427642822, "rewards/rejected": -2.142005443572998, "step": 5327 }, { "epoch": 0.61, "learning_rate": 1.1749970736275313e-07, "logits/chosen": -3.0894155502319336, "logits/rejected": -2.8906004428863525, "logps/chosen": -362.88482666015625, "logps/rejected": -428.5622863769531, "loss": 0.076, "rewards/accuracies": 1.0, "rewards/chosen": 0.331216424703598, "rewards/margins": 3.1433463096618652, "rewards/rejected": -2.8121302127838135, "step": 5328 }, { "epoch": 0.61, "learning_rate": 1.1746459089312887e-07, "logits/chosen": -2.402616500854492, "logits/rejected": -2.1959052085876465, "logps/chosen": -321.28521728515625, "logps/rejected": -364.29913330078125, "loss": 0.3752, "rewards/accuracies": 0.875, "rewards/chosen": -0.762101411819458, "rewards/margins": 2.0562899112701416, "rewards/rejected": -2.8183910846710205, "step": 5329 }, { "epoch": 0.61, "learning_rate": 1.1742947442350463e-07, "logits/chosen": -3.1478371620178223, "logits/rejected": -2.9943008422851562, "logps/chosen": -351.0337219238281, "logps/rejected": -258.2586364746094, "loss": 0.253, "rewards/accuracies": 1.0, "rewards/chosen": -0.6636461019515991, "rewards/margins": 1.955299973487854, "rewards/rejected": -2.618946075439453, "step": 5330 }, { "epoch": 0.61, "learning_rate": 1.1739435795388037e-07, "logits/chosen": -3.491133213043213, "logits/rejected": -3.777883529663086, "logps/chosen": -135.02462768554688, "logps/rejected": -318.19952392578125, "loss": 0.3777, "rewards/accuracies": 0.875, "rewards/chosen": 0.015131175518035889, "rewards/margins": 3.5479683876037598, "rewards/rejected": -3.532837152481079, "step": 5331 }, { "epoch": 0.61, "learning_rate": 1.1735924148425611e-07, "logits/chosen": -3.3519742488861084, "logits/rejected": -3.6445095539093018, "logps/chosen": -180.16610717773438, "logps/rejected": -304.76409912109375, "loss": 0.5737, "rewards/accuracies": 0.75, "rewards/chosen": -0.8437068462371826, "rewards/margins": 4.059483528137207, "rewards/rejected": -4.903190612792969, "step": 5332 }, { "epoch": 0.61, "learning_rate": 1.1732412501463185e-07, "logits/chosen": -3.0656003952026367, "logits/rejected": -3.0494801998138428, "logps/chosen": -284.759033203125, "logps/rejected": -347.4619140625, "loss": 0.3902, "rewards/accuracies": 0.75, "rewards/chosen": -0.288232684135437, "rewards/margins": 1.8624756336212158, "rewards/rejected": -2.1507084369659424, "step": 5333 }, { "epoch": 0.61, "learning_rate": 1.1728900854500759e-07, "logits/chosen": -3.800502300262451, "logits/rejected": -3.53464937210083, "logps/chosen": -287.4187316894531, "logps/rejected": -187.14830017089844, "loss": 0.3117, "rewards/accuracies": 0.875, "rewards/chosen": -0.46296465396881104, "rewards/margins": 2.2086329460144043, "rewards/rejected": -2.671597719192505, "step": 5334 }, { "epoch": 0.62, "learning_rate": 1.1725389207538336e-07, "logits/chosen": -2.9256017208099365, "logits/rejected": -2.8205676078796387, "logps/chosen": -178.267578125, "logps/rejected": -233.16204833984375, "loss": 0.3904, "rewards/accuracies": 0.875, "rewards/chosen": -0.328305721282959, "rewards/margins": 2.335015296936035, "rewards/rejected": -2.663321018218994, "step": 5335 }, { "epoch": 0.62, "learning_rate": 1.172187756057591e-07, "logits/chosen": -3.3814122676849365, "logits/rejected": -3.501513719558716, "logps/chosen": -121.938720703125, "logps/rejected": -173.59361267089844, "loss": 0.6302, "rewards/accuracies": 0.5, "rewards/chosen": -0.23246164619922638, "rewards/margins": 0.9932323694229126, "rewards/rejected": -1.2256940603256226, "step": 5336 }, { "epoch": 0.62, "learning_rate": 1.1718365913613484e-07, "logits/chosen": -3.236764907836914, "logits/rejected": -3.1434755325317383, "logps/chosen": -236.4114990234375, "logps/rejected": -311.14337158203125, "loss": 0.1755, "rewards/accuracies": 1.0, "rewards/chosen": -0.273426353931427, "rewards/margins": 2.69964861869812, "rewards/rejected": -2.9730751514434814, "step": 5337 }, { "epoch": 0.62, "learning_rate": 1.1714854266651058e-07, "logits/chosen": -3.455944538116455, "logits/rejected": -3.6186256408691406, "logps/chosen": -176.22402954101562, "logps/rejected": -181.53271484375, "loss": 0.366, "rewards/accuracies": 0.875, "rewards/chosen": 0.14881527423858643, "rewards/margins": 1.9560400247573853, "rewards/rejected": -1.8072246313095093, "step": 5338 }, { "epoch": 0.62, "learning_rate": 1.1711342619688634e-07, "logits/chosen": -3.065885543823242, "logits/rejected": -3.2828285694122314, "logps/chosen": -191.8654327392578, "logps/rejected": -170.66455078125, "loss": 0.6744, "rewards/accuracies": 0.5, "rewards/chosen": -0.3719821572303772, "rewards/margins": 0.31412550806999207, "rewards/rejected": -0.6861076354980469, "step": 5339 }, { "epoch": 0.62, "learning_rate": 1.1707830972726209e-07, "logits/chosen": -3.1119391918182373, "logits/rejected": -2.972588062286377, "logps/chosen": -284.97113037109375, "logps/rejected": -236.74998474121094, "loss": 0.184, "rewards/accuracies": 1.0, "rewards/chosen": 0.426011323928833, "rewards/margins": 2.314789295196533, "rewards/rejected": -1.8887779712677002, "step": 5340 }, { "epoch": 0.62, "learning_rate": 1.1704319325763783e-07, "logits/chosen": -3.187002182006836, "logits/rejected": -3.2886123657226562, "logps/chosen": -204.85812377929688, "logps/rejected": -185.04075622558594, "loss": 0.3148, "rewards/accuracies": 1.0, "rewards/chosen": 0.06176924705505371, "rewards/margins": 1.9781360626220703, "rewards/rejected": -1.9163668155670166, "step": 5341 }, { "epoch": 0.62, "learning_rate": 1.1700807678801357e-07, "logits/chosen": -2.8209750652313232, "logits/rejected": -2.702106237411499, "logps/chosen": -205.88223266601562, "logps/rejected": -136.56605529785156, "loss": 0.8899, "rewards/accuracies": 0.5, "rewards/chosen": -0.5664583444595337, "rewards/margins": 0.4206068813800812, "rewards/rejected": -0.9870651960372925, "step": 5342 }, { "epoch": 0.62, "learning_rate": 1.1697296031838932e-07, "logits/chosen": -2.800090789794922, "logits/rejected": -3.3524303436279297, "logps/chosen": -218.87380981445312, "logps/rejected": -282.2682189941406, "loss": 0.488, "rewards/accuracies": 0.75, "rewards/chosen": 0.16046997904777527, "rewards/margins": 1.9108846187591553, "rewards/rejected": -1.7504147291183472, "step": 5343 }, { "epoch": 0.62, "learning_rate": 1.1693784384876506e-07, "logits/chosen": -3.130704879760742, "logits/rejected": -2.9810471534729004, "logps/chosen": -357.56427001953125, "logps/rejected": -426.0700988769531, "loss": 0.2248, "rewards/accuracies": 0.875, "rewards/chosen": 0.24282976984977722, "rewards/margins": 2.745765447616577, "rewards/rejected": -2.5029354095458984, "step": 5344 }, { "epoch": 0.62, "learning_rate": 1.1690272737914081e-07, "logits/chosen": -2.4825663566589355, "logits/rejected": -2.5842790603637695, "logps/chosen": -271.2116394042969, "logps/rejected": -171.34786987304688, "loss": 0.2174, "rewards/accuracies": 1.0, "rewards/chosen": 0.5077495574951172, "rewards/margins": 1.6565362215042114, "rewards/rejected": -1.1487867832183838, "step": 5345 }, { "epoch": 0.62, "learning_rate": 1.1686761090951656e-07, "logits/chosen": -3.0953383445739746, "logits/rejected": -2.850071430206299, "logps/chosen": -435.32421875, "logps/rejected": -388.5845031738281, "loss": 0.3082, "rewards/accuracies": 0.875, "rewards/chosen": -0.1500154435634613, "rewards/margins": 1.8172104358673096, "rewards/rejected": -1.9672259092330933, "step": 5346 }, { "epoch": 0.62, "learning_rate": 1.1683249443989231e-07, "logits/chosen": -2.1106648445129395, "logits/rejected": -2.185969591140747, "logps/chosen": -442.5934143066406, "logps/rejected": -398.11981201171875, "loss": 0.595, "rewards/accuracies": 0.875, "rewards/chosen": -0.12288632243871689, "rewards/margins": 0.7967591881752014, "rewards/rejected": -0.9196454882621765, "step": 5347 }, { "epoch": 0.62, "learning_rate": 1.1679737797026805e-07, "logits/chosen": -2.9698996543884277, "logits/rejected": -2.7327969074249268, "logps/chosen": -237.7808837890625, "logps/rejected": -211.74642944335938, "loss": 0.5587, "rewards/accuracies": 0.75, "rewards/chosen": 0.001416921615600586, "rewards/margins": 1.8736257553100586, "rewards/rejected": -1.8722089529037476, "step": 5348 }, { "epoch": 0.62, "learning_rate": 1.1676226150064379e-07, "logits/chosen": -2.8923890590667725, "logits/rejected": -2.6324353218078613, "logps/chosen": -195.33990478515625, "logps/rejected": -187.3834228515625, "loss": 0.5373, "rewards/accuracies": 0.5, "rewards/chosen": -0.26573634147644043, "rewards/margins": 1.328852653503418, "rewards/rejected": -1.5945888757705688, "step": 5349 }, { "epoch": 0.62, "learning_rate": 1.1672714503101953e-07, "logits/chosen": -3.5589406490325928, "logits/rejected": -3.4424633979797363, "logps/chosen": -211.50296020507812, "logps/rejected": -217.49502563476562, "loss": 0.6954, "rewards/accuracies": 0.5, "rewards/chosen": -0.9044696092605591, "rewards/margins": 0.6498827338218689, "rewards/rejected": -1.5543524026870728, "step": 5350 }, { "epoch": 0.62, "learning_rate": 1.166920285613953e-07, "logits/chosen": -3.0086312294006348, "logits/rejected": -2.9488587379455566, "logps/chosen": -391.87847900390625, "logps/rejected": -321.02691650390625, "loss": 0.3231, "rewards/accuracies": 0.875, "rewards/chosen": 0.49690067768096924, "rewards/margins": 1.2262020111083984, "rewards/rejected": -0.729301393032074, "step": 5351 }, { "epoch": 0.62, "learning_rate": 1.1665691209177104e-07, "logits/chosen": -2.7800304889678955, "logits/rejected": -2.7538325786590576, "logps/chosen": -288.1872253417969, "logps/rejected": -257.7411804199219, "loss": 0.7158, "rewards/accuracies": 0.5, "rewards/chosen": -0.3013516962528229, "rewards/margins": 0.32236841320991516, "rewards/rejected": -0.623720109462738, "step": 5352 }, { "epoch": 0.62, "learning_rate": 1.1662179562214678e-07, "logits/chosen": -2.281803607940674, "logits/rejected": -2.277806282043457, "logps/chosen": -473.61590576171875, "logps/rejected": -395.5416564941406, "loss": 0.5023, "rewards/accuracies": 0.75, "rewards/chosen": 0.5726956129074097, "rewards/margins": 0.8667303919792175, "rewards/rejected": -0.29403483867645264, "step": 5353 }, { "epoch": 0.62, "learning_rate": 1.1658667915252252e-07, "logits/chosen": -2.492711067199707, "logits/rejected": -2.419447183609009, "logps/chosen": -412.77294921875, "logps/rejected": -319.8740539550781, "loss": 0.601, "rewards/accuracies": 0.625, "rewards/chosen": -0.29719799757003784, "rewards/margins": 0.5715266466140747, "rewards/rejected": -0.8687245845794678, "step": 5354 }, { "epoch": 0.62, "learning_rate": 1.1655156268289827e-07, "logits/chosen": -3.4408748149871826, "logits/rejected": -3.4335925579071045, "logps/chosen": -359.4884033203125, "logps/rejected": -282.474365234375, "loss": 0.9117, "rewards/accuracies": 0.625, "rewards/chosen": -1.1333894729614258, "rewards/margins": 0.7807798385620117, "rewards/rejected": -1.914169430732727, "step": 5355 }, { "epoch": 0.62, "learning_rate": 1.1651644621327403e-07, "logits/chosen": -2.9157872200012207, "logits/rejected": -2.74361515045166, "logps/chosen": -266.62646484375, "logps/rejected": -285.52301025390625, "loss": 0.3993, "rewards/accuracies": 0.75, "rewards/chosen": -0.2488541603088379, "rewards/margins": 2.2976036071777344, "rewards/rejected": -2.546457529067993, "step": 5356 }, { "epoch": 0.62, "learning_rate": 1.1648132974364977e-07, "logits/chosen": -2.7847070693969727, "logits/rejected": -3.3674535751342773, "logps/chosen": -134.5704345703125, "logps/rejected": -188.64144897460938, "loss": 0.1762, "rewards/accuracies": 1.0, "rewards/chosen": 0.6167689561843872, "rewards/margins": 2.695558547973633, "rewards/rejected": -2.078789710998535, "step": 5357 }, { "epoch": 0.62, "learning_rate": 1.1644621327402551e-07, "logits/chosen": -2.9598584175109863, "logits/rejected": -2.8595986366271973, "logps/chosen": -261.0899658203125, "logps/rejected": -238.88121032714844, "loss": 0.3561, "rewards/accuracies": 0.875, "rewards/chosen": -0.16842323541641235, "rewards/margins": 1.4701862335205078, "rewards/rejected": -1.6386094093322754, "step": 5358 }, { "epoch": 0.62, "learning_rate": 1.1641109680440126e-07, "logits/chosen": -3.641542911529541, "logits/rejected": -3.7010583877563477, "logps/chosen": -171.93292236328125, "logps/rejected": -216.55474853515625, "loss": 0.4554, "rewards/accuracies": 0.75, "rewards/chosen": -0.20446255803108215, "rewards/margins": 1.739989995956421, "rewards/rejected": -1.9444525241851807, "step": 5359 }, { "epoch": 0.62, "learning_rate": 1.16375980334777e-07, "logits/chosen": -2.7971901893615723, "logits/rejected": -2.84483003616333, "logps/chosen": -293.93487548828125, "logps/rejected": -276.29827880859375, "loss": 0.2045, "rewards/accuracies": 1.0, "rewards/chosen": -0.4803926944732666, "rewards/margins": 2.2325050830841064, "rewards/rejected": -2.712897539138794, "step": 5360 }, { "epoch": 0.62, "learning_rate": 1.1634086386515274e-07, "logits/chosen": -3.4643607139587402, "logits/rejected": -3.2944631576538086, "logps/chosen": -175.54051208496094, "logps/rejected": -193.58775329589844, "loss": 0.4159, "rewards/accuracies": 0.75, "rewards/chosen": -0.32840365171432495, "rewards/margins": 2.3999476432800293, "rewards/rejected": -2.728351354598999, "step": 5361 }, { "epoch": 0.62, "learning_rate": 1.163057473955285e-07, "logits/chosen": -3.506871223449707, "logits/rejected": -3.2820370197296143, "logps/chosen": -201.41183471679688, "logps/rejected": -213.82748413085938, "loss": 0.5613, "rewards/accuracies": 0.625, "rewards/chosen": -0.7490542531013489, "rewards/margins": 0.6519057154655457, "rewards/rejected": -1.4009599685668945, "step": 5362 }, { "epoch": 0.62, "learning_rate": 1.1627063092590425e-07, "logits/chosen": -3.064243793487549, "logits/rejected": -2.6860570907592773, "logps/chosen": -170.38438415527344, "logps/rejected": -225.47384643554688, "loss": 0.5712, "rewards/accuracies": 0.75, "rewards/chosen": -0.34198832511901855, "rewards/margins": 1.3039499521255493, "rewards/rejected": -1.6459382772445679, "step": 5363 }, { "epoch": 0.62, "learning_rate": 1.1623551445627999e-07, "logits/chosen": -4.304924964904785, "logits/rejected": -3.9734325408935547, "logps/chosen": -380.6185607910156, "logps/rejected": -189.29541015625, "loss": 0.3436, "rewards/accuracies": 0.75, "rewards/chosen": -0.11818194389343262, "rewards/margins": 1.7870562076568604, "rewards/rejected": -1.9052382707595825, "step": 5364 }, { "epoch": 0.62, "learning_rate": 1.1620039798665573e-07, "logits/chosen": -2.360538959503174, "logits/rejected": -2.377342700958252, "logps/chosen": -313.07025146484375, "logps/rejected": -407.6640625, "loss": 0.4556, "rewards/accuracies": 0.625, "rewards/chosen": -0.6989781260490417, "rewards/margins": 1.9685337543487549, "rewards/rejected": -2.6675119400024414, "step": 5365 }, { "epoch": 0.62, "learning_rate": 1.1616528151703147e-07, "logits/chosen": -3.8501389026641846, "logits/rejected": -3.4594995975494385, "logps/chosen": -238.63307189941406, "logps/rejected": -188.6956329345703, "loss": 0.5013, "rewards/accuracies": 0.5, "rewards/chosen": -0.6884474158287048, "rewards/margins": 1.5536322593688965, "rewards/rejected": -2.242079734802246, "step": 5366 }, { "epoch": 0.62, "learning_rate": 1.1613016504740724e-07, "logits/chosen": -3.5160298347473145, "logits/rejected": -2.789252281188965, "logps/chosen": -309.52423095703125, "logps/rejected": -162.95404052734375, "loss": 0.6461, "rewards/accuracies": 0.625, "rewards/chosen": -0.15706217288970947, "rewards/margins": 1.5007129907608032, "rewards/rejected": -1.6577751636505127, "step": 5367 }, { "epoch": 0.62, "learning_rate": 1.1609504857778298e-07, "logits/chosen": -3.1422603130340576, "logits/rejected": -2.9530131816864014, "logps/chosen": -138.58863830566406, "logps/rejected": -172.74046325683594, "loss": 0.3, "rewards/accuracies": 0.875, "rewards/chosen": 0.3287200331687927, "rewards/margins": 2.238513946533203, "rewards/rejected": -1.909793734550476, "step": 5368 }, { "epoch": 0.62, "learning_rate": 1.1605993210815872e-07, "logits/chosen": -2.3680989742279053, "logits/rejected": -2.3658642768859863, "logps/chosen": -331.8570251464844, "logps/rejected": -191.47860717773438, "loss": 0.459, "rewards/accuracies": 0.625, "rewards/chosen": -0.29823246598243713, "rewards/margins": 1.2475379705429077, "rewards/rejected": -1.545770525932312, "step": 5369 }, { "epoch": 0.62, "learning_rate": 1.1602481563853446e-07, "logits/chosen": -2.9775118827819824, "logits/rejected": -2.706105947494507, "logps/chosen": -183.79908752441406, "logps/rejected": -177.86422729492188, "loss": 0.3683, "rewards/accuracies": 0.75, "rewards/chosen": -0.19985660910606384, "rewards/margins": 1.490896463394165, "rewards/rejected": -1.6907532215118408, "step": 5370 }, { "epoch": 0.62, "learning_rate": 1.1598969916891022e-07, "logits/chosen": -3.2645456790924072, "logits/rejected": -3.371328353881836, "logps/chosen": -172.08164978027344, "logps/rejected": -193.70401000976562, "loss": 0.4403, "rewards/accuracies": 0.75, "rewards/chosen": 0.2348976582288742, "rewards/margins": 1.964170217514038, "rewards/rejected": -1.7292726039886475, "step": 5371 }, { "epoch": 0.62, "learning_rate": 1.1595458269928596e-07, "logits/chosen": -3.0191707611083984, "logits/rejected": -3.1529541015625, "logps/chosen": -142.548583984375, "logps/rejected": -225.72662353515625, "loss": 0.1109, "rewards/accuracies": 1.0, "rewards/chosen": 0.46746212244033813, "rewards/margins": 2.7991256713867188, "rewards/rejected": -2.3316633701324463, "step": 5372 }, { "epoch": 0.62, "learning_rate": 1.1591946622966171e-07, "logits/chosen": -2.8772644996643066, "logits/rejected": -2.7923009395599365, "logps/chosen": -398.38177490234375, "logps/rejected": -186.63479614257812, "loss": 0.2628, "rewards/accuracies": 0.875, "rewards/chosen": 0.616472601890564, "rewards/margins": 1.9396283626556396, "rewards/rejected": -1.3231558799743652, "step": 5373 }, { "epoch": 0.62, "learning_rate": 1.1588434976003745e-07, "logits/chosen": -3.531045436859131, "logits/rejected": -3.352996349334717, "logps/chosen": -327.17327880859375, "logps/rejected": -321.76068115234375, "loss": 0.4477, "rewards/accuracies": 0.625, "rewards/chosen": 0.14260751008987427, "rewards/margins": 1.86981999874115, "rewards/rejected": -1.7272124290466309, "step": 5374 }, { "epoch": 0.62, "learning_rate": 1.158492332904132e-07, "logits/chosen": -2.60917067527771, "logits/rejected": -2.534966468811035, "logps/chosen": -235.608642578125, "logps/rejected": -372.5523681640625, "loss": 0.6513, "rewards/accuracies": 0.75, "rewards/chosen": 0.1976119875907898, "rewards/margins": 1.8307408094406128, "rewards/rejected": -1.6331288814544678, "step": 5375 }, { "epoch": 0.62, "learning_rate": 1.1581411682078895e-07, "logits/chosen": -4.025502681732178, "logits/rejected": -3.471933364868164, "logps/chosen": -278.642822265625, "logps/rejected": -208.4122314453125, "loss": 0.8403, "rewards/accuracies": 0.75, "rewards/chosen": -1.4096136093139648, "rewards/margins": 1.4459164142608643, "rewards/rejected": -2.855530023574829, "step": 5376 }, { "epoch": 0.62, "learning_rate": 1.1577900035116469e-07, "logits/chosen": -2.206879138946533, "logits/rejected": -2.642988443374634, "logps/chosen": -269.44342041015625, "logps/rejected": -260.6522216796875, "loss": 0.3802, "rewards/accuracies": 0.75, "rewards/chosen": -0.3760431408882141, "rewards/margins": 1.7626538276672363, "rewards/rejected": -2.1386971473693848, "step": 5377 }, { "epoch": 0.62, "learning_rate": 1.1574388388154043e-07, "logits/chosen": -3.2079758644104004, "logits/rejected": -3.2585935592651367, "logps/chosen": -373.4296875, "logps/rejected": -374.28857421875, "loss": 0.5142, "rewards/accuracies": 0.625, "rewards/chosen": -1.1068644523620605, "rewards/margins": 0.6792547106742859, "rewards/rejected": -1.7861191034317017, "step": 5378 }, { "epoch": 0.62, "learning_rate": 1.157087674119162e-07, "logits/chosen": -2.5816521644592285, "logits/rejected": -2.7543318271636963, "logps/chosen": -286.53887939453125, "logps/rejected": -231.24273681640625, "loss": 0.2489, "rewards/accuracies": 0.875, "rewards/chosen": -0.22637930512428284, "rewards/margins": 2.225533962249756, "rewards/rejected": -2.451913356781006, "step": 5379 }, { "epoch": 0.62, "learning_rate": 1.1567365094229194e-07, "logits/chosen": -3.822096586227417, "logits/rejected": -3.7211849689483643, "logps/chosen": -374.24298095703125, "logps/rejected": -276.4981689453125, "loss": 0.4712, "rewards/accuracies": 0.75, "rewards/chosen": -0.19458897411823273, "rewards/margins": 1.0528664588928223, "rewards/rejected": -1.2474554777145386, "step": 5380 }, { "epoch": 0.62, "learning_rate": 1.1563853447266768e-07, "logits/chosen": -3.264277696609497, "logits/rejected": -3.530611991882324, "logps/chosen": -304.64068603515625, "logps/rejected": -331.2483825683594, "loss": 0.3091, "rewards/accuracies": 0.875, "rewards/chosen": 0.04678189754486084, "rewards/margins": 2.0136659145355225, "rewards/rejected": -1.9668841361999512, "step": 5381 }, { "epoch": 0.62, "learning_rate": 1.1560341800304342e-07, "logits/chosen": -3.364964723587036, "logits/rejected": -3.331735849380493, "logps/chosen": -269.4156494140625, "logps/rejected": -225.2075653076172, "loss": 0.2944, "rewards/accuracies": 0.875, "rewards/chosen": 0.06383839249610901, "rewards/margins": 2.0007333755493164, "rewards/rejected": -1.9368950128555298, "step": 5382 }, { "epoch": 0.62, "learning_rate": 1.1556830153341916e-07, "logits/chosen": -3.3190815448760986, "logits/rejected": -3.3456435203552246, "logps/chosen": -307.66278076171875, "logps/rejected": -259.479736328125, "loss": 0.5095, "rewards/accuracies": 0.625, "rewards/chosen": 0.1309036910533905, "rewards/margins": 1.546416997909546, "rewards/rejected": -1.415513277053833, "step": 5383 }, { "epoch": 0.62, "learning_rate": 1.1553318506379492e-07, "logits/chosen": -3.6437695026397705, "logits/rejected": -3.157029628753662, "logps/chosen": -284.73406982421875, "logps/rejected": -169.3768768310547, "loss": 0.5391, "rewards/accuracies": 0.625, "rewards/chosen": 0.34945350885391235, "rewards/margins": 1.341076374053955, "rewards/rejected": -0.9916229248046875, "step": 5384 }, { "epoch": 0.62, "learning_rate": 1.1549806859417066e-07, "logits/chosen": -3.7959232330322266, "logits/rejected": -3.4397354125976562, "logps/chosen": -277.3111877441406, "logps/rejected": -149.9631805419922, "loss": 0.3229, "rewards/accuracies": 0.875, "rewards/chosen": 0.048348478972911835, "rewards/margins": 1.3206932544708252, "rewards/rejected": -1.2723448276519775, "step": 5385 }, { "epoch": 0.62, "learning_rate": 1.154629521245464e-07, "logits/chosen": -3.120267152786255, "logits/rejected": -2.985335111618042, "logps/chosen": -420.81866455078125, "logps/rejected": -433.0013122558594, "loss": 0.1791, "rewards/accuracies": 1.0, "rewards/chosen": -0.21078455448150635, "rewards/margins": 3.5479841232299805, "rewards/rejected": -3.7587685585021973, "step": 5386 }, { "epoch": 0.62, "learning_rate": 1.1542783565492215e-07, "logits/chosen": -3.4664766788482666, "logits/rejected": -3.244804859161377, "logps/chosen": -160.06060791015625, "logps/rejected": -139.59918212890625, "loss": 0.5818, "rewards/accuracies": 0.625, "rewards/chosen": -0.6167044639587402, "rewards/margins": 0.6924952864646912, "rewards/rejected": -1.3091998100280762, "step": 5387 }, { "epoch": 0.62, "learning_rate": 1.153927191852979e-07, "logits/chosen": -2.6381890773773193, "logits/rejected": -2.712780237197876, "logps/chosen": -393.1153564453125, "logps/rejected": -367.4856872558594, "loss": 0.4411, "rewards/accuracies": 0.75, "rewards/chosen": 0.5455946922302246, "rewards/margins": 0.8453782796859741, "rewards/rejected": -0.2997836470603943, "step": 5388 }, { "epoch": 0.62, "learning_rate": 1.1535760271567364e-07, "logits/chosen": -2.6452341079711914, "logits/rejected": -2.7302184104919434, "logps/chosen": -408.2210998535156, "logps/rejected": -444.42034912109375, "loss": 0.8822, "rewards/accuracies": 0.5, "rewards/chosen": -0.42330971360206604, "rewards/margins": 0.8302160501480103, "rewards/rejected": -1.253525733947754, "step": 5389 }, { "epoch": 0.62, "learning_rate": 1.153224862460494e-07, "logits/chosen": -3.5458033084869385, "logits/rejected": -3.072679042816162, "logps/chosen": -263.4439392089844, "logps/rejected": -141.41001892089844, "loss": 0.3718, "rewards/accuracies": 0.875, "rewards/chosen": -0.4121408760547638, "rewards/margins": 1.2258577346801758, "rewards/rejected": -1.6379987001419067, "step": 5390 }, { "epoch": 0.62, "learning_rate": 1.1528736977642513e-07, "logits/chosen": -2.7225780487060547, "logits/rejected": -2.771023750305176, "logps/chosen": -201.1732635498047, "logps/rejected": -229.7334442138672, "loss": 0.1262, "rewards/accuracies": 1.0, "rewards/chosen": 0.051396504044532776, "rewards/margins": 3.471224546432495, "rewards/rejected": -3.419827938079834, "step": 5391 }, { "epoch": 0.62, "learning_rate": 1.1525225330680089e-07, "logits/chosen": -2.082326889038086, "logits/rejected": -2.0754449367523193, "logps/chosen": -495.4266357421875, "logps/rejected": -344.55792236328125, "loss": 0.1492, "rewards/accuracies": 1.0, "rewards/chosen": -0.2101336121559143, "rewards/margins": 2.1621766090393066, "rewards/rejected": -2.372310161590576, "step": 5392 }, { "epoch": 0.62, "learning_rate": 1.1521713683717663e-07, "logits/chosen": -2.6029341220855713, "logits/rejected": -2.7744336128234863, "logps/chosen": -293.91827392578125, "logps/rejected": -192.15835571289062, "loss": 0.3035, "rewards/accuracies": 0.875, "rewards/chosen": -0.06664273142814636, "rewards/margins": 2.1556262969970703, "rewards/rejected": -2.222269058227539, "step": 5393 }, { "epoch": 0.62, "learning_rate": 1.1518202036755237e-07, "logits/chosen": -3.458683729171753, "logits/rejected": -3.314277172088623, "logps/chosen": -298.3847961425781, "logps/rejected": -193.99102783203125, "loss": 0.4917, "rewards/accuracies": 0.625, "rewards/chosen": -0.4525948464870453, "rewards/margins": 1.4432734251022339, "rewards/rejected": -1.8958684206008911, "step": 5394 }, { "epoch": 0.62, "learning_rate": 1.1514690389792811e-07, "logits/chosen": -3.58998441696167, "logits/rejected": -3.6365833282470703, "logps/chosen": -210.98135375976562, "logps/rejected": -211.5127410888672, "loss": 0.5932, "rewards/accuracies": 0.75, "rewards/chosen": -0.38476985692977905, "rewards/margins": 1.6893210411071777, "rewards/rejected": -2.0740909576416016, "step": 5395 }, { "epoch": 0.62, "learning_rate": 1.1511178742830388e-07, "logits/chosen": -2.6051440238952637, "logits/rejected": -3.0333023071289062, "logps/chosen": -278.6952819824219, "logps/rejected": -159.6924591064453, "loss": 0.6043, "rewards/accuracies": 0.625, "rewards/chosen": -0.35068565607070923, "rewards/margins": 1.330560326576233, "rewards/rejected": -1.681246042251587, "step": 5396 }, { "epoch": 0.62, "learning_rate": 1.1507667095867962e-07, "logits/chosen": -3.63861083984375, "logits/rejected": -3.3933768272399902, "logps/chosen": -173.93887329101562, "logps/rejected": -209.50067138671875, "loss": 0.4215, "rewards/accuracies": 0.875, "rewards/chosen": 0.12903009355068207, "rewards/margins": 1.3305171728134155, "rewards/rejected": -1.2014870643615723, "step": 5397 }, { "epoch": 0.62, "learning_rate": 1.1504155448905536e-07, "logits/chosen": -3.0496575832366943, "logits/rejected": -3.16556715965271, "logps/chosen": -342.45849609375, "logps/rejected": -245.93946838378906, "loss": 0.2666, "rewards/accuracies": 0.875, "rewards/chosen": 0.640344500541687, "rewards/margins": 2.2235093116760254, "rewards/rejected": -1.5831648111343384, "step": 5398 }, { "epoch": 0.62, "learning_rate": 1.150064380194311e-07, "logits/chosen": -3.1905038356781006, "logits/rejected": -3.363406181335449, "logps/chosen": -283.7483215332031, "logps/rejected": -280.486328125, "loss": 0.4722, "rewards/accuracies": 0.75, "rewards/chosen": 0.008265011012554169, "rewards/margins": 1.7873954772949219, "rewards/rejected": -1.7791303396224976, "step": 5399 }, { "epoch": 0.62, "learning_rate": 1.1497132154980687e-07, "logits/chosen": -3.301525115966797, "logits/rejected": -3.8508386611938477, "logps/chosen": -193.31004333496094, "logps/rejected": -366.68011474609375, "loss": 0.619, "rewards/accuracies": 0.5, "rewards/chosen": -0.5141566395759583, "rewards/margins": 2.0829615592956543, "rewards/rejected": -2.597118377685547, "step": 5400 }, { "epoch": 0.62, "learning_rate": 1.1493620508018261e-07, "logits/chosen": -2.770860433578491, "logits/rejected": -2.551589012145996, "logps/chosen": -314.78253173828125, "logps/rejected": -422.1291198730469, "loss": 0.3633, "rewards/accuracies": 0.75, "rewards/chosen": -0.39257174730300903, "rewards/margins": 1.58377206325531, "rewards/rejected": -1.9763438701629639, "step": 5401 }, { "epoch": 0.62, "learning_rate": 1.1490108861055835e-07, "logits/chosen": -3.0580742359161377, "logits/rejected": -3.1320137977600098, "logps/chosen": -219.77944946289062, "logps/rejected": -139.26168823242188, "loss": 0.8642, "rewards/accuracies": 0.5, "rewards/chosen": -0.21715977787971497, "rewards/margins": 0.8461529612541199, "rewards/rejected": -1.0633126497268677, "step": 5402 }, { "epoch": 0.62, "learning_rate": 1.1486597214093409e-07, "logits/chosen": -3.369581937789917, "logits/rejected": -3.575672149658203, "logps/chosen": -125.51608276367188, "logps/rejected": -255.1027374267578, "loss": 0.3569, "rewards/accuracies": 0.75, "rewards/chosen": 0.43118661642074585, "rewards/margins": 2.2483248710632324, "rewards/rejected": -1.8171381950378418, "step": 5403 }, { "epoch": 0.62, "learning_rate": 1.1483085567130984e-07, "logits/chosen": -4.299713611602783, "logits/rejected": -3.7780957221984863, "logps/chosen": -456.3119201660156, "logps/rejected": -261.55059814453125, "loss": 0.3405, "rewards/accuracies": 0.75, "rewards/chosen": -0.18194884061813354, "rewards/margins": 1.8531893491744995, "rewards/rejected": -2.0351383686065674, "step": 5404 }, { "epoch": 0.62, "learning_rate": 1.1479573920168558e-07, "logits/chosen": -2.953155994415283, "logits/rejected": -3.108332633972168, "logps/chosen": -187.6568145751953, "logps/rejected": -411.9206237792969, "loss": 0.1787, "rewards/accuracies": 1.0, "rewards/chosen": 0.13976708054542542, "rewards/margins": 2.5863378047943115, "rewards/rejected": -2.446570634841919, "step": 5405 }, { "epoch": 0.62, "learning_rate": 1.1476062273206132e-07, "logits/chosen": -3.154958486557007, "logits/rejected": -2.8166441917419434, "logps/chosen": -263.3016357421875, "logps/rejected": -274.4705505371094, "loss": 0.1373, "rewards/accuracies": 1.0, "rewards/chosen": -0.13420172035694122, "rewards/margins": 3.1139426231384277, "rewards/rejected": -3.2481441497802734, "step": 5406 }, { "epoch": 0.62, "learning_rate": 1.1472550626243708e-07, "logits/chosen": -2.9040894508361816, "logits/rejected": -2.9279346466064453, "logps/chosen": -264.76708984375, "logps/rejected": -189.4815673828125, "loss": 0.3979, "rewards/accuracies": 0.75, "rewards/chosen": 0.12637919187545776, "rewards/margins": 2.0214807987213135, "rewards/rejected": -1.8951016664505005, "step": 5407 }, { "epoch": 0.62, "learning_rate": 1.1469038979281283e-07, "logits/chosen": -3.070676565170288, "logits/rejected": -2.966632604598999, "logps/chosen": -480.0934143066406, "logps/rejected": -333.8505554199219, "loss": 0.5298, "rewards/accuracies": 0.75, "rewards/chosen": -0.21648862957954407, "rewards/margins": 1.086332082748413, "rewards/rejected": -1.3028206825256348, "step": 5408 }, { "epoch": 0.62, "learning_rate": 1.1465527332318857e-07, "logits/chosen": -2.7101669311523438, "logits/rejected": -2.6957597732543945, "logps/chosen": -162.93040466308594, "logps/rejected": -255.97894287109375, "loss": 0.1754, "rewards/accuracies": 1.0, "rewards/chosen": 0.3285563886165619, "rewards/margins": 2.7797298431396484, "rewards/rejected": -2.4511733055114746, "step": 5409 }, { "epoch": 0.62, "learning_rate": 1.1462015685356431e-07, "logits/chosen": -2.925961494445801, "logits/rejected": -2.6428089141845703, "logps/chosen": -271.47564697265625, "logps/rejected": -222.16268920898438, "loss": 0.4027, "rewards/accuracies": 0.75, "rewards/chosen": -0.4405193030834198, "rewards/margins": 2.243133544921875, "rewards/rejected": -2.683652639389038, "step": 5410 }, { "epoch": 0.62, "learning_rate": 1.1458504038394005e-07, "logits/chosen": -3.0183939933776855, "logits/rejected": -3.2148308753967285, "logps/chosen": -131.65011596679688, "logps/rejected": -162.43545532226562, "loss": 0.324, "rewards/accuracies": 1.0, "rewards/chosen": -0.16675469279289246, "rewards/margins": 1.6619699001312256, "rewards/rejected": -1.8287245035171509, "step": 5411 }, { "epoch": 0.62, "learning_rate": 1.1454992391431582e-07, "logits/chosen": -2.8220229148864746, "logits/rejected": -3.366724967956543, "logps/chosen": -126.24058532714844, "logps/rejected": -144.64662170410156, "loss": 0.7044, "rewards/accuracies": 0.75, "rewards/chosen": -0.03696702420711517, "rewards/margins": 1.1342765092849731, "rewards/rejected": -1.17124342918396, "step": 5412 }, { "epoch": 0.62, "learning_rate": 1.1451480744469156e-07, "logits/chosen": -3.0047519207000732, "logits/rejected": -3.090839147567749, "logps/chosen": -176.64801025390625, "logps/rejected": -344.8848876953125, "loss": 0.6468, "rewards/accuracies": 0.625, "rewards/chosen": -0.29135653376579285, "rewards/margins": 2.4813733100891113, "rewards/rejected": -2.7727296352386475, "step": 5413 }, { "epoch": 0.62, "learning_rate": 1.144796909750673e-07, "logits/chosen": -2.925032615661621, "logits/rejected": -3.062242031097412, "logps/chosen": -321.96514892578125, "logps/rejected": -296.39019775390625, "loss": 0.4248, "rewards/accuracies": 0.875, "rewards/chosen": -0.22239811718463898, "rewards/margins": 1.4773352146148682, "rewards/rejected": -1.6997333765029907, "step": 5414 }, { "epoch": 0.62, "learning_rate": 1.1444457450544304e-07, "logits/chosen": -2.4675376415252686, "logits/rejected": -2.4189505577087402, "logps/chosen": -237.29827880859375, "logps/rejected": -231.3621063232422, "loss": 0.3968, "rewards/accuracies": 0.875, "rewards/chosen": -0.4489559531211853, "rewards/margins": 1.4412122964859009, "rewards/rejected": -1.8901681900024414, "step": 5415 }, { "epoch": 0.62, "learning_rate": 1.144094580358188e-07, "logits/chosen": -2.884814977645874, "logits/rejected": -2.778104543685913, "logps/chosen": -404.9049072265625, "logps/rejected": -200.90936279296875, "loss": 0.2806, "rewards/accuracies": 0.875, "rewards/chosen": 0.24723833799362183, "rewards/margins": 2.126283884048462, "rewards/rejected": -1.8790454864501953, "step": 5416 }, { "epoch": 0.62, "learning_rate": 1.1437434156619455e-07, "logits/chosen": -2.202568292617798, "logits/rejected": -2.464547634124756, "logps/chosen": -376.1035461425781, "logps/rejected": -354.1040344238281, "loss": 0.7762, "rewards/accuracies": 0.75, "rewards/chosen": -0.7168967723846436, "rewards/margins": 0.5304185152053833, "rewards/rejected": -1.2473154067993164, "step": 5417 }, { "epoch": 0.62, "learning_rate": 1.1433922509657029e-07, "logits/chosen": -3.5774011611938477, "logits/rejected": -4.000446796417236, "logps/chosen": -107.77477264404297, "logps/rejected": -282.89703369140625, "loss": 0.1697, "rewards/accuracies": 1.0, "rewards/chosen": 0.8016239404678345, "rewards/margins": 4.8462395668029785, "rewards/rejected": -4.044615745544434, "step": 5418 }, { "epoch": 0.62, "learning_rate": 1.1430410862694603e-07, "logits/chosen": -2.620431900024414, "logits/rejected": -3.0785717964172363, "logps/chosen": -175.94989013671875, "logps/rejected": -218.52227783203125, "loss": 0.4215, "rewards/accuracies": 0.75, "rewards/chosen": -0.032325759530067444, "rewards/margins": 1.2509998083114624, "rewards/rejected": -1.2833256721496582, "step": 5419 }, { "epoch": 0.62, "learning_rate": 1.1426899215732178e-07, "logits/chosen": -3.1671366691589355, "logits/rejected": -2.99135160446167, "logps/chosen": -185.8384552001953, "logps/rejected": -243.1055908203125, "loss": 0.3873, "rewards/accuracies": 0.75, "rewards/chosen": -0.0011806488037109375, "rewards/margins": 1.0082817077636719, "rewards/rejected": -1.0094622373580933, "step": 5420 }, { "epoch": 0.62, "learning_rate": 1.1423387568769753e-07, "logits/chosen": -2.63264536857605, "logits/rejected": -3.0324509143829346, "logps/chosen": -104.58489227294922, "logps/rejected": -185.0447998046875, "loss": 0.5335, "rewards/accuracies": 0.75, "rewards/chosen": -0.5698172450065613, "rewards/margins": 1.0728511810302734, "rewards/rejected": -1.6426682472229004, "step": 5421 }, { "epoch": 0.63, "learning_rate": 1.1419875921807327e-07, "logits/chosen": -2.9067025184631348, "logits/rejected": -2.6670165061950684, "logps/chosen": -208.9619140625, "logps/rejected": -238.3024139404297, "loss": 0.5933, "rewards/accuracies": 0.75, "rewards/chosen": -0.8235546350479126, "rewards/margins": 1.0167022943496704, "rewards/rejected": -1.8402568101882935, "step": 5422 }, { "epoch": 0.63, "learning_rate": 1.1416364274844901e-07, "logits/chosen": -2.978832960128784, "logits/rejected": -3.1148366928100586, "logps/chosen": -383.68096923828125, "logps/rejected": -390.9912109375, "loss": 0.6266, "rewards/accuracies": 0.75, "rewards/chosen": 0.26824572682380676, "rewards/margins": 2.4279966354370117, "rewards/rejected": -2.1597509384155273, "step": 5423 }, { "epoch": 0.63, "learning_rate": 1.1412852627882477e-07, "logits/chosen": -2.9072251319885254, "logits/rejected": -3.3236148357391357, "logps/chosen": -190.90957641601562, "logps/rejected": -256.1713562011719, "loss": 0.2069, "rewards/accuracies": 1.0, "rewards/chosen": 0.01433553360402584, "rewards/margins": 2.844028949737549, "rewards/rejected": -2.82969331741333, "step": 5424 }, { "epoch": 0.63, "learning_rate": 1.1409340980920051e-07, "logits/chosen": -3.109469413757324, "logits/rejected": -2.5608115196228027, "logps/chosen": -463.7767333984375, "logps/rejected": -420.4684753417969, "loss": 0.1323, "rewards/accuracies": 1.0, "rewards/chosen": 0.2778787612915039, "rewards/margins": 2.9256393909454346, "rewards/rejected": -2.6477603912353516, "step": 5425 }, { "epoch": 0.63, "learning_rate": 1.1405829333957626e-07, "logits/chosen": -3.141941785812378, "logits/rejected": -3.2391011714935303, "logps/chosen": -147.60377502441406, "logps/rejected": -264.9602966308594, "loss": 0.2218, "rewards/accuracies": 0.875, "rewards/chosen": -0.047821879386901855, "rewards/margins": 3.1390957832336426, "rewards/rejected": -3.186917543411255, "step": 5426 }, { "epoch": 0.63, "learning_rate": 1.14023176869952e-07, "logits/chosen": -3.6093525886535645, "logits/rejected": -3.3298633098602295, "logps/chosen": -204.36322021484375, "logps/rejected": -151.15078735351562, "loss": 0.5657, "rewards/accuracies": 0.625, "rewards/chosen": -0.5423285961151123, "rewards/margins": 0.8560734391212463, "rewards/rejected": -1.3984020948410034, "step": 5427 }, { "epoch": 0.63, "learning_rate": 1.1398806040032776e-07, "logits/chosen": -3.6177897453308105, "logits/rejected": -3.276336669921875, "logps/chosen": -217.2994842529297, "logps/rejected": -246.02566528320312, "loss": 0.2333, "rewards/accuracies": 0.875, "rewards/chosen": -0.15047326683998108, "rewards/margins": 2.269132614135742, "rewards/rejected": -2.4196062088012695, "step": 5428 }, { "epoch": 0.63, "learning_rate": 1.139529439307035e-07, "logits/chosen": -3.082894802093506, "logits/rejected": -3.460049629211426, "logps/chosen": -281.1061706542969, "logps/rejected": -295.80780029296875, "loss": 0.2662, "rewards/accuracies": 1.0, "rewards/chosen": -0.12408819794654846, "rewards/margins": 3.4467334747314453, "rewards/rejected": -3.570821762084961, "step": 5429 }, { "epoch": 0.63, "learning_rate": 1.1391782746107924e-07, "logits/chosen": -2.802238702774048, "logits/rejected": -2.550931930541992, "logps/chosen": -416.0614318847656, "logps/rejected": -267.0864562988281, "loss": 0.6602, "rewards/accuracies": 0.625, "rewards/chosen": -0.12466681003570557, "rewards/margins": 1.1529356241226196, "rewards/rejected": -1.2776024341583252, "step": 5430 }, { "epoch": 0.63, "learning_rate": 1.1388271099145498e-07, "logits/chosen": -2.8321495056152344, "logits/rejected": -3.1490602493286133, "logps/chosen": -149.52276611328125, "logps/rejected": -232.69137573242188, "loss": 0.2792, "rewards/accuracies": 0.875, "rewards/chosen": -0.11203056573867798, "rewards/margins": 1.9958739280700684, "rewards/rejected": -2.1079044342041016, "step": 5431 }, { "epoch": 0.63, "learning_rate": 1.1384759452183073e-07, "logits/chosen": -3.296407461166382, "logits/rejected": -2.870182991027832, "logps/chosen": -305.19390869140625, "logps/rejected": -218.38131713867188, "loss": 0.2697, "rewards/accuracies": 0.75, "rewards/chosen": 0.5059475302696228, "rewards/margins": 2.417079448699951, "rewards/rejected": -1.9111319780349731, "step": 5432 }, { "epoch": 0.63, "learning_rate": 1.1381247805220648e-07, "logits/chosen": -2.7658138275146484, "logits/rejected": -2.8423752784729004, "logps/chosen": -271.23333740234375, "logps/rejected": -478.18267822265625, "loss": 0.4862, "rewards/accuracies": 0.75, "rewards/chosen": -0.16587768495082855, "rewards/margins": 1.790069580078125, "rewards/rejected": -1.9559471607208252, "step": 5433 }, { "epoch": 0.63, "learning_rate": 1.1377736158258223e-07, "logits/chosen": -2.8306634426116943, "logits/rejected": -2.794461965560913, "logps/chosen": -261.24853515625, "logps/rejected": -193.36582946777344, "loss": 0.5264, "rewards/accuracies": 0.625, "rewards/chosen": -0.5096862316131592, "rewards/margins": 1.4253833293914795, "rewards/rejected": -1.9350695610046387, "step": 5434 }, { "epoch": 0.63, "learning_rate": 1.1374224511295797e-07, "logits/chosen": -2.6660594940185547, "logits/rejected": -3.0235042572021484, "logps/chosen": -109.76969909667969, "logps/rejected": -319.69097900390625, "loss": 0.0851, "rewards/accuracies": 1.0, "rewards/chosen": 0.34447622299194336, "rewards/margins": 3.847911834716797, "rewards/rejected": -3.5034356117248535, "step": 5435 }, { "epoch": 0.63, "learning_rate": 1.1370712864333371e-07, "logits/chosen": -3.0998990535736084, "logits/rejected": -3.336967945098877, "logps/chosen": -414.9439392089844, "logps/rejected": -232.19619750976562, "loss": 0.354, "rewards/accuracies": 0.875, "rewards/chosen": -0.45807647705078125, "rewards/margins": 1.7607529163360596, "rewards/rejected": -2.21882963180542, "step": 5436 }, { "epoch": 0.63, "learning_rate": 1.1367201217370947e-07, "logits/chosen": -3.728574275970459, "logits/rejected": -3.5528323650360107, "logps/chosen": -287.1342468261719, "logps/rejected": -238.96002197265625, "loss": 0.1266, "rewards/accuracies": 1.0, "rewards/chosen": 0.38072076439857483, "rewards/margins": 3.321661949157715, "rewards/rejected": -2.9409408569335938, "step": 5437 }, { "epoch": 0.63, "learning_rate": 1.1363689570408521e-07, "logits/chosen": -3.526336193084717, "logits/rejected": -3.264965057373047, "logps/chosen": -345.6750793457031, "logps/rejected": -116.9435806274414, "loss": 0.7122, "rewards/accuracies": 0.5, "rewards/chosen": 0.15803363919258118, "rewards/margins": 0.8013694286346436, "rewards/rejected": -0.6433357000350952, "step": 5438 }, { "epoch": 0.63, "learning_rate": 1.1360177923446095e-07, "logits/chosen": -3.0087521076202393, "logits/rejected": -2.824171543121338, "logps/chosen": -223.07186889648438, "logps/rejected": -213.9716339111328, "loss": 0.426, "rewards/accuracies": 0.875, "rewards/chosen": -0.37828201055526733, "rewards/margins": 1.7054604291915894, "rewards/rejected": -2.083742380142212, "step": 5439 }, { "epoch": 0.63, "learning_rate": 1.1356666276483669e-07, "logits/chosen": -3.228221893310547, "logits/rejected": -2.894627809524536, "logps/chosen": -296.3581848144531, "logps/rejected": -286.0030822753906, "loss": 0.5976, "rewards/accuracies": 0.75, "rewards/chosen": -0.3554047644138336, "rewards/margins": 1.2417845726013184, "rewards/rejected": -1.5971894264221191, "step": 5440 }, { "epoch": 0.63, "learning_rate": 1.1353154629521246e-07, "logits/chosen": -3.2218034267425537, "logits/rejected": -2.8591294288635254, "logps/chosen": -285.0145263671875, "logps/rejected": -274.23272705078125, "loss": 0.8723, "rewards/accuracies": 0.5, "rewards/chosen": -0.8490760326385498, "rewards/margins": 0.13216370344161987, "rewards/rejected": -0.9812397956848145, "step": 5441 }, { "epoch": 0.63, "learning_rate": 1.134964298255882e-07, "logits/chosen": -4.100832462310791, "logits/rejected": -3.80203914642334, "logps/chosen": -235.61962890625, "logps/rejected": -227.03167724609375, "loss": 0.3129, "rewards/accuracies": 0.875, "rewards/chosen": 0.26808854937553406, "rewards/margins": 1.6245558261871338, "rewards/rejected": -1.3564671277999878, "step": 5442 }, { "epoch": 0.63, "learning_rate": 1.1346131335596394e-07, "logits/chosen": -3.4097819328308105, "logits/rejected": -3.295990228652954, "logps/chosen": -211.964599609375, "logps/rejected": -180.66329956054688, "loss": 0.4157, "rewards/accuracies": 0.75, "rewards/chosen": -0.5080854892730713, "rewards/margins": 1.5150783061981201, "rewards/rejected": -2.0231637954711914, "step": 5443 }, { "epoch": 0.63, "learning_rate": 1.1342619688633968e-07, "logits/chosen": -3.1121020317077637, "logits/rejected": -3.2889585494995117, "logps/chosen": -207.82928466796875, "logps/rejected": -289.00762939453125, "loss": 0.0863, "rewards/accuracies": 1.0, "rewards/chosen": -0.14625754952430725, "rewards/margins": 4.511458396911621, "rewards/rejected": -4.657716274261475, "step": 5444 }, { "epoch": 0.63, "learning_rate": 1.1339108041671545e-07, "logits/chosen": -3.490854263305664, "logits/rejected": -3.321404457092285, "logps/chosen": -238.72340393066406, "logps/rejected": -205.87388610839844, "loss": 0.234, "rewards/accuracies": 0.875, "rewards/chosen": 0.3354250192642212, "rewards/margins": 1.9860202074050903, "rewards/rejected": -1.6505951881408691, "step": 5445 }, { "epoch": 0.63, "learning_rate": 1.1335596394709119e-07, "logits/chosen": -3.2389137744903564, "logits/rejected": -3.1520161628723145, "logps/chosen": -253.183349609375, "logps/rejected": -242.78567504882812, "loss": 0.2368, "rewards/accuracies": 1.0, "rewards/chosen": -0.3328303396701813, "rewards/margins": 1.6772754192352295, "rewards/rejected": -2.010105609893799, "step": 5446 }, { "epoch": 0.63, "learning_rate": 1.1332084747746693e-07, "logits/chosen": -2.9628305435180664, "logits/rejected": -2.890165328979492, "logps/chosen": -212.53640747070312, "logps/rejected": -182.61257934570312, "loss": 0.5047, "rewards/accuracies": 0.75, "rewards/chosen": 0.37939080595970154, "rewards/margins": 0.7861454486846924, "rewards/rejected": -0.406754732131958, "step": 5447 }, { "epoch": 0.63, "learning_rate": 1.1328573100784267e-07, "logits/chosen": -3.116598606109619, "logits/rejected": -2.9375576972961426, "logps/chosen": -337.99609375, "logps/rejected": -235.3089599609375, "loss": 0.5024, "rewards/accuracies": 0.875, "rewards/chosen": -0.39569348096847534, "rewards/margins": 2.57637095451355, "rewards/rejected": -2.972064971923828, "step": 5448 }, { "epoch": 0.63, "learning_rate": 1.1325061453821842e-07, "logits/chosen": -2.991877555847168, "logits/rejected": -3.1429474353790283, "logps/chosen": -371.9602355957031, "logps/rejected": -249.68043518066406, "loss": 0.4805, "rewards/accuracies": 0.875, "rewards/chosen": -0.26950603723526, "rewards/margins": 1.5438337326049805, "rewards/rejected": -1.8133398294448853, "step": 5449 }, { "epoch": 0.63, "learning_rate": 1.1321549806859416e-07, "logits/chosen": -2.960064649581909, "logits/rejected": -3.080139636993408, "logps/chosen": -203.80941772460938, "logps/rejected": -315.38623046875, "loss": 0.2668, "rewards/accuracies": 1.0, "rewards/chosen": 0.12071201205253601, "rewards/margins": 2.563567638397217, "rewards/rejected": -2.4428555965423584, "step": 5450 }, { "epoch": 0.63, "learning_rate": 1.1318038159896992e-07, "logits/chosen": -2.9071130752563477, "logits/rejected": -2.765225648880005, "logps/chosen": -289.81298828125, "logps/rejected": -280.88726806640625, "loss": 0.5503, "rewards/accuracies": 0.375, "rewards/chosen": 0.07644108682870865, "rewards/margins": 1.2755508422851562, "rewards/rejected": -1.1991097927093506, "step": 5451 }, { "epoch": 0.63, "learning_rate": 1.1314526512934566e-07, "logits/chosen": -3.750321388244629, "logits/rejected": -3.59281587600708, "logps/chosen": -242.33827209472656, "logps/rejected": -119.08585357666016, "loss": 0.1899, "rewards/accuracies": 0.875, "rewards/chosen": 1.0153803825378418, "rewards/margins": 2.235485553741455, "rewards/rejected": -1.2201051712036133, "step": 5452 }, { "epoch": 0.63, "learning_rate": 1.1311014865972141e-07, "logits/chosen": -3.1659464836120605, "logits/rejected": -3.2590179443359375, "logps/chosen": -545.3598022460938, "logps/rejected": -289.70953369140625, "loss": 0.486, "rewards/accuracies": 0.75, "rewards/chosen": -0.21787193417549133, "rewards/margins": 1.8483103513717651, "rewards/rejected": -2.0661821365356445, "step": 5453 }, { "epoch": 0.63, "learning_rate": 1.1307503219009715e-07, "logits/chosen": -2.451460838317871, "logits/rejected": -2.4063472747802734, "logps/chosen": -170.24411010742188, "logps/rejected": -192.6683807373047, "loss": 0.4421, "rewards/accuracies": 0.75, "rewards/chosen": -0.10424965620040894, "rewards/margins": 1.3747406005859375, "rewards/rejected": -1.4789901971817017, "step": 5454 }, { "epoch": 0.63, "learning_rate": 1.1303991572047289e-07, "logits/chosen": -3.2338876724243164, "logits/rejected": -3.245044231414795, "logps/chosen": -241.0927276611328, "logps/rejected": -271.0386047363281, "loss": 0.7426, "rewards/accuracies": 0.625, "rewards/chosen": -0.34863513708114624, "rewards/margins": 2.761258363723755, "rewards/rejected": -3.109893798828125, "step": 5455 }, { "epoch": 0.63, "learning_rate": 1.1300479925084863e-07, "logits/chosen": -2.910937547683716, "logits/rejected": -2.9161999225616455, "logps/chosen": -244.00106811523438, "logps/rejected": -340.05047607421875, "loss": 0.2815, "rewards/accuracies": 0.75, "rewards/chosen": -0.4220176339149475, "rewards/margins": 2.5856494903564453, "rewards/rejected": -3.007667303085327, "step": 5456 }, { "epoch": 0.63, "learning_rate": 1.129696827812244e-07, "logits/chosen": -3.194343090057373, "logits/rejected": -2.9654996395111084, "logps/chosen": -308.35601806640625, "logps/rejected": -175.7947998046875, "loss": 0.3087, "rewards/accuracies": 0.875, "rewards/chosen": 0.1461782604455948, "rewards/margins": 1.2838854789733887, "rewards/rejected": -1.1377073526382446, "step": 5457 }, { "epoch": 0.63, "learning_rate": 1.1293456631160014e-07, "logits/chosen": -2.6091010570526123, "logits/rejected": -2.7087013721466064, "logps/chosen": -405.87109375, "logps/rejected": -340.6890869140625, "loss": 0.2325, "rewards/accuracies": 0.75, "rewards/chosen": 0.2965024709701538, "rewards/margins": 2.8282132148742676, "rewards/rejected": -2.531710624694824, "step": 5458 }, { "epoch": 0.63, "learning_rate": 1.1289944984197588e-07, "logits/chosen": -2.9300050735473633, "logits/rejected": -3.0163733959198, "logps/chosen": -152.17926025390625, "logps/rejected": -238.68359375, "loss": 0.6028, "rewards/accuracies": 0.5, "rewards/chosen": -0.2626815736293793, "rewards/margins": 1.0172045230865479, "rewards/rejected": -1.2798861265182495, "step": 5459 }, { "epoch": 0.63, "learning_rate": 1.1286433337235162e-07, "logits/chosen": -2.729191541671753, "logits/rejected": -2.871738910675049, "logps/chosen": -249.9425048828125, "logps/rejected": -265.055908203125, "loss": 0.8353, "rewards/accuracies": 0.625, "rewards/chosen": -0.11828944832086563, "rewards/margins": 0.9151426553726196, "rewards/rejected": -1.033432126045227, "step": 5460 }, { "epoch": 0.63, "learning_rate": 1.1282921690272738e-07, "logits/chosen": -3.222806930541992, "logits/rejected": -3.399954319000244, "logps/chosen": -201.75534057617188, "logps/rejected": -236.1642303466797, "loss": 0.7093, "rewards/accuracies": 0.75, "rewards/chosen": -0.4881255030632019, "rewards/margins": 0.8929263949394226, "rewards/rejected": -1.3810518980026245, "step": 5461 }, { "epoch": 0.63, "learning_rate": 1.1279410043310313e-07, "logits/chosen": -2.8979408740997314, "logits/rejected": -3.125361919403076, "logps/chosen": -322.6529541015625, "logps/rejected": -300.826171875, "loss": 0.4069, "rewards/accuracies": 0.875, "rewards/chosen": -0.31613582372665405, "rewards/margins": 1.1837774515151978, "rewards/rejected": -1.499913215637207, "step": 5462 }, { "epoch": 0.63, "learning_rate": 1.1275898396347887e-07, "logits/chosen": -2.647444248199463, "logits/rejected": -2.7109642028808594, "logps/chosen": -319.679931640625, "logps/rejected": -317.677001953125, "loss": 0.468, "rewards/accuracies": 0.75, "rewards/chosen": -0.19447019696235657, "rewards/margins": 1.4868981838226318, "rewards/rejected": -1.681368350982666, "step": 5463 }, { "epoch": 0.63, "learning_rate": 1.1272386749385461e-07, "logits/chosen": -3.595109701156616, "logits/rejected": -3.2960875034332275, "logps/chosen": -279.94647216796875, "logps/rejected": -192.41842651367188, "loss": 0.2299, "rewards/accuracies": 1.0, "rewards/chosen": 0.04962420463562012, "rewards/margins": 1.8095622062683105, "rewards/rejected": -1.7599380016326904, "step": 5464 }, { "epoch": 0.63, "learning_rate": 1.1268875102423036e-07, "logits/chosen": -2.961522102355957, "logits/rejected": -2.8921151161193848, "logps/chosen": -245.94371032714844, "logps/rejected": -164.75112915039062, "loss": 0.5996, "rewards/accuracies": 0.625, "rewards/chosen": 0.20202457904815674, "rewards/margins": 1.1998381614685059, "rewards/rejected": -0.9978134632110596, "step": 5465 }, { "epoch": 0.63, "learning_rate": 1.126536345546061e-07, "logits/chosen": -3.589834690093994, "logits/rejected": -3.3231847286224365, "logps/chosen": -217.22613525390625, "logps/rejected": -249.88613891601562, "loss": 0.2193, "rewards/accuracies": 1.0, "rewards/chosen": 0.24851754307746887, "rewards/margins": 2.4803733825683594, "rewards/rejected": -2.231855869293213, "step": 5466 }, { "epoch": 0.63, "learning_rate": 1.1261851808498185e-07, "logits/chosen": -2.4813520908355713, "logits/rejected": -2.3675708770751953, "logps/chosen": -316.1728210449219, "logps/rejected": -301.47607421875, "loss": 0.2915, "rewards/accuracies": 0.875, "rewards/chosen": -0.036406297236680984, "rewards/margins": 2.8549630641937256, "rewards/rejected": -2.891369342803955, "step": 5467 }, { "epoch": 0.63, "learning_rate": 1.1258340161535759e-07, "logits/chosen": -2.8529398441314697, "logits/rejected": -2.5666921138763428, "logps/chosen": -290.71295166015625, "logps/rejected": -201.75439453125, "loss": 0.4166, "rewards/accuracies": 0.875, "rewards/chosen": -0.013438880443572998, "rewards/margins": 1.3818333148956299, "rewards/rejected": -1.3952722549438477, "step": 5468 }, { "epoch": 0.63, "learning_rate": 1.1254828514573335e-07, "logits/chosen": -3.5478787422180176, "logits/rejected": -3.6383392810821533, "logps/chosen": -285.2642822265625, "logps/rejected": -231.12814331054688, "loss": 0.5834, "rewards/accuracies": 0.625, "rewards/chosen": -1.1902227401733398, "rewards/margins": 0.7500325441360474, "rewards/rejected": -1.9402554035186768, "step": 5469 }, { "epoch": 0.63, "learning_rate": 1.125131686761091e-07, "logits/chosen": -3.1576550006866455, "logits/rejected": -2.8569412231445312, "logps/chosen": -133.49795532226562, "logps/rejected": -266.6334533691406, "loss": 0.2957, "rewards/accuracies": 0.875, "rewards/chosen": -0.5071113109588623, "rewards/margins": 2.5961060523986816, "rewards/rejected": -3.103217124938965, "step": 5470 }, { "epoch": 0.63, "learning_rate": 1.1247805220648483e-07, "logits/chosen": -2.686901569366455, "logits/rejected": -2.7907938957214355, "logps/chosen": -396.9640808105469, "logps/rejected": -359.2188720703125, "loss": 0.4158, "rewards/accuracies": 0.75, "rewards/chosen": -0.11026449501514435, "rewards/margins": 1.0140016078948975, "rewards/rejected": -1.1242660284042358, "step": 5471 }, { "epoch": 0.63, "learning_rate": 1.1244293573686058e-07, "logits/chosen": -2.5132343769073486, "logits/rejected": -2.519845485687256, "logps/chosen": -133.67977905273438, "logps/rejected": -251.34068298339844, "loss": 0.5071, "rewards/accuracies": 0.625, "rewards/chosen": 0.022243410348892212, "rewards/margins": 1.8780794143676758, "rewards/rejected": -1.8558359146118164, "step": 5472 }, { "epoch": 0.63, "learning_rate": 1.1240781926723634e-07, "logits/chosen": -2.7375595569610596, "logits/rejected": -2.8225605487823486, "logps/chosen": -311.9199523925781, "logps/rejected": -179.55682373046875, "loss": 0.5972, "rewards/accuracies": 0.625, "rewards/chosen": -0.5273056030273438, "rewards/margins": 0.3976099491119385, "rewards/rejected": -0.9249155521392822, "step": 5473 }, { "epoch": 0.63, "learning_rate": 1.1237270279761208e-07, "logits/chosen": -2.752690315246582, "logits/rejected": -3.0547902584075928, "logps/chosen": -289.3938293457031, "logps/rejected": -252.96621704101562, "loss": 0.3935, "rewards/accuracies": 0.75, "rewards/chosen": 0.1708342432975769, "rewards/margins": 1.4646451473236084, "rewards/rejected": -1.2938108444213867, "step": 5474 }, { "epoch": 0.63, "learning_rate": 1.1233758632798782e-07, "logits/chosen": -2.8565263748168945, "logits/rejected": -2.9993391036987305, "logps/chosen": -414.2953796386719, "logps/rejected": -222.9341583251953, "loss": 0.3523, "rewards/accuracies": 0.75, "rewards/chosen": -0.026893839240074158, "rewards/margins": 1.9569569826126099, "rewards/rejected": -1.9838507175445557, "step": 5475 }, { "epoch": 0.63, "learning_rate": 1.1230246985836356e-07, "logits/chosen": -3.0080456733703613, "logits/rejected": -2.7970476150512695, "logps/chosen": -155.52613830566406, "logps/rejected": -107.7236328125, "loss": 0.6354, "rewards/accuracies": 0.5, "rewards/chosen": -0.35474684834480286, "rewards/margins": 0.37442582845687866, "rewards/rejected": -0.7291726469993591, "step": 5476 }, { "epoch": 0.63, "learning_rate": 1.122673533887393e-07, "logits/chosen": -3.6482114791870117, "logits/rejected": -3.4958415031433105, "logps/chosen": -235.63941955566406, "logps/rejected": -230.5106201171875, "loss": 0.9269, "rewards/accuracies": 0.75, "rewards/chosen": 0.08770638704299927, "rewards/margins": 1.9582582712173462, "rewards/rejected": -1.8705518245697021, "step": 5477 }, { "epoch": 0.63, "learning_rate": 1.1223223691911506e-07, "logits/chosen": -2.789910078048706, "logits/rejected": -2.3334789276123047, "logps/chosen": -131.17750549316406, "logps/rejected": -449.4748229980469, "loss": 0.2376, "rewards/accuracies": 0.875, "rewards/chosen": 0.027030013501644135, "rewards/margins": 3.3140439987182617, "rewards/rejected": -3.2870140075683594, "step": 5478 }, { "epoch": 0.63, "learning_rate": 1.1219712044949081e-07, "logits/chosen": -3.126075506210327, "logits/rejected": -2.8245368003845215, "logps/chosen": -234.1618194580078, "logps/rejected": -258.89892578125, "loss": 0.1442, "rewards/accuracies": 1.0, "rewards/chosen": 0.6773309111595154, "rewards/margins": 3.2553558349609375, "rewards/rejected": -2.5780248641967773, "step": 5479 }, { "epoch": 0.63, "learning_rate": 1.1216200397986655e-07, "logits/chosen": -2.8615169525146484, "logits/rejected": -2.5520660877227783, "logps/chosen": -224.00656127929688, "logps/rejected": -268.4437255859375, "loss": 0.2738, "rewards/accuracies": 1.0, "rewards/chosen": 0.3212311863899231, "rewards/margins": 1.435390830039978, "rewards/rejected": -1.1141595840454102, "step": 5480 }, { "epoch": 0.63, "learning_rate": 1.121268875102423e-07, "logits/chosen": -3.1347765922546387, "logits/rejected": -3.1705238819122314, "logps/chosen": -357.8116760253906, "logps/rejected": -292.01727294921875, "loss": 0.2385, "rewards/accuracies": 0.875, "rewards/chosen": -0.31670644879341125, "rewards/margins": 2.730114698410034, "rewards/rejected": -3.046821355819702, "step": 5481 }, { "epoch": 0.63, "learning_rate": 1.1209177104061805e-07, "logits/chosen": -2.2741241455078125, "logits/rejected": -2.504934310913086, "logps/chosen": -339.88153076171875, "logps/rejected": -273.9819030761719, "loss": 0.1556, "rewards/accuracies": 1.0, "rewards/chosen": 1.1699130535125732, "rewards/margins": 3.0218961238861084, "rewards/rejected": -1.8519833087921143, "step": 5482 }, { "epoch": 0.63, "learning_rate": 1.1205665457099379e-07, "logits/chosen": -3.467454671859741, "logits/rejected": -3.2936248779296875, "logps/chosen": -334.1725158691406, "logps/rejected": -244.14776611328125, "loss": 0.4266, "rewards/accuracies": 0.875, "rewards/chosen": -0.02453860640525818, "rewards/margins": 1.6729447841644287, "rewards/rejected": -1.6974833011627197, "step": 5483 }, { "epoch": 0.63, "learning_rate": 1.1202153810136953e-07, "logits/chosen": -3.3341166973114014, "logits/rejected": -3.0226941108703613, "logps/chosen": -377.73419189453125, "logps/rejected": -231.8177947998047, "loss": 0.4177, "rewards/accuracies": 0.75, "rewards/chosen": 0.024500221014022827, "rewards/margins": 1.2430144548416138, "rewards/rejected": -1.2185142040252686, "step": 5484 }, { "epoch": 0.63, "learning_rate": 1.1198642163174527e-07, "logits/chosen": -3.0285165309906006, "logits/rejected": -2.7580361366271973, "logps/chosen": -436.3427429199219, "logps/rejected": -353.1871337890625, "loss": 0.2927, "rewards/accuracies": 0.75, "rewards/chosen": 0.6433258056640625, "rewards/margins": 2.3950695991516113, "rewards/rejected": -1.7517437934875488, "step": 5485 }, { "epoch": 0.63, "learning_rate": 1.1195130516212104e-07, "logits/chosen": -3.093221426010132, "logits/rejected": -3.1678154468536377, "logps/chosen": -337.2660827636719, "logps/rejected": -369.57666015625, "loss": 0.8245, "rewards/accuracies": 0.5, "rewards/chosen": -0.500946044921875, "rewards/margins": 0.6713109612464905, "rewards/rejected": -1.1722568273544312, "step": 5486 }, { "epoch": 0.63, "learning_rate": 1.1191618869249678e-07, "logits/chosen": -2.9627132415771484, "logits/rejected": -3.2397356033325195, "logps/chosen": -189.01760864257812, "logps/rejected": -379.73077392578125, "loss": 0.1948, "rewards/accuracies": 1.0, "rewards/chosen": 0.263182669878006, "rewards/margins": 4.189352989196777, "rewards/rejected": -3.9261703491210938, "step": 5487 }, { "epoch": 0.63, "learning_rate": 1.1188107222287252e-07, "logits/chosen": -2.622494697570801, "logits/rejected": -2.824293851852417, "logps/chosen": -202.5093231201172, "logps/rejected": -236.33265686035156, "loss": 0.4966, "rewards/accuracies": 0.75, "rewards/chosen": -0.5236482620239258, "rewards/margins": 1.5963563919067383, "rewards/rejected": -2.120004653930664, "step": 5488 }, { "epoch": 0.63, "learning_rate": 1.1184595575324826e-07, "logits/chosen": -3.165426254272461, "logits/rejected": -3.333479404449463, "logps/chosen": -175.8100128173828, "logps/rejected": -325.0626220703125, "loss": 0.3701, "rewards/accuracies": 0.875, "rewards/chosen": -0.04856446385383606, "rewards/margins": 1.4174546003341675, "rewards/rejected": -1.4660191535949707, "step": 5489 }, { "epoch": 0.63, "learning_rate": 1.1181083928362403e-07, "logits/chosen": -3.4511826038360596, "logits/rejected": -3.328671932220459, "logps/chosen": -200.65191650390625, "logps/rejected": -185.54669189453125, "loss": 0.2928, "rewards/accuracies": 0.875, "rewards/chosen": -0.3867371082305908, "rewards/margins": 2.0300190448760986, "rewards/rejected": -2.4167561531066895, "step": 5490 }, { "epoch": 0.63, "learning_rate": 1.1177572281399977e-07, "logits/chosen": -3.573338508605957, "logits/rejected": -3.4713687896728516, "logps/chosen": -449.6094665527344, "logps/rejected": -394.2887878417969, "loss": 0.1549, "rewards/accuracies": 1.0, "rewards/chosen": 0.7046130895614624, "rewards/margins": 2.857367515563965, "rewards/rejected": -2.152754306793213, "step": 5491 }, { "epoch": 0.63, "learning_rate": 1.117406063443755e-07, "logits/chosen": -3.0129055976867676, "logits/rejected": -3.2479326725006104, "logps/chosen": -319.97259521484375, "logps/rejected": -261.19915771484375, "loss": 0.4255, "rewards/accuracies": 0.75, "rewards/chosen": 0.02799205482006073, "rewards/margins": 1.9949949979782104, "rewards/rejected": -1.9670027494430542, "step": 5492 }, { "epoch": 0.63, "learning_rate": 1.1170548987475125e-07, "logits/chosen": -3.35306453704834, "logits/rejected": -3.1737165451049805, "logps/chosen": -296.0086669921875, "logps/rejected": -210.87933349609375, "loss": 0.4761, "rewards/accuracies": 0.625, "rewards/chosen": -0.019969038665294647, "rewards/margins": 0.8099215626716614, "rewards/rejected": -0.8298905491828918, "step": 5493 }, { "epoch": 0.63, "learning_rate": 1.11670373405127e-07, "logits/chosen": -3.1186866760253906, "logits/rejected": -3.2538411617279053, "logps/chosen": -208.56736755371094, "logps/rejected": -252.6584014892578, "loss": 0.3489, "rewards/accuracies": 0.875, "rewards/chosen": 0.25513797998428345, "rewards/margins": 1.531419038772583, "rewards/rejected": -1.2762811183929443, "step": 5494 }, { "epoch": 0.63, "learning_rate": 1.1163525693550274e-07, "logits/chosen": -3.1133861541748047, "logits/rejected": -2.958989143371582, "logps/chosen": -224.08558654785156, "logps/rejected": -247.2935791015625, "loss": 0.258, "rewards/accuracies": 0.875, "rewards/chosen": 0.28028395771980286, "rewards/margins": 2.087554931640625, "rewards/rejected": -1.807271122932434, "step": 5495 }, { "epoch": 0.63, "learning_rate": 1.116001404658785e-07, "logits/chosen": -3.2378695011138916, "logits/rejected": -3.3117024898529053, "logps/chosen": -294.3490295410156, "logps/rejected": -199.15228271484375, "loss": 0.3782, "rewards/accuracies": 0.75, "rewards/chosen": -0.3890104591846466, "rewards/margins": 0.9755556583404541, "rewards/rejected": -1.3645660877227783, "step": 5496 }, { "epoch": 0.63, "learning_rate": 1.1156502399625424e-07, "logits/chosen": -3.28847074508667, "logits/rejected": -3.117636203765869, "logps/chosen": -276.21246337890625, "logps/rejected": -194.04251098632812, "loss": 0.5365, "rewards/accuracies": 0.625, "rewards/chosen": -0.05329914391040802, "rewards/margins": 0.7173891067504883, "rewards/rejected": -0.7706882953643799, "step": 5497 }, { "epoch": 0.63, "learning_rate": 1.1152990752662999e-07, "logits/chosen": -3.267228126525879, "logits/rejected": -3.4802141189575195, "logps/chosen": -183.12152099609375, "logps/rejected": -243.7259979248047, "loss": 0.2143, "rewards/accuracies": 1.0, "rewards/chosen": 0.4132990837097168, "rewards/margins": 2.1349406242370605, "rewards/rejected": -1.7216416597366333, "step": 5498 }, { "epoch": 0.63, "learning_rate": 1.1149479105700573e-07, "logits/chosen": -3.1874938011169434, "logits/rejected": -2.705043077468872, "logps/chosen": -197.82650756835938, "logps/rejected": -176.7633514404297, "loss": 0.5871, "rewards/accuracies": 0.75, "rewards/chosen": -0.4271780252456665, "rewards/margins": 0.9877208471298218, "rewards/rejected": -1.4148989915847778, "step": 5499 }, { "epoch": 0.63, "learning_rate": 1.1145967458738147e-07, "logits/chosen": -3.4292654991149902, "logits/rejected": -3.6989030838012695, "logps/chosen": -213.780029296875, "logps/rejected": -333.119873046875, "loss": 0.5713, "rewards/accuracies": 0.75, "rewards/chosen": -0.49021971225738525, "rewards/margins": 1.1087383031845093, "rewards/rejected": -1.598958134651184, "step": 5500 }, { "epoch": 0.63, "learning_rate": 1.1142455811775721e-07, "logits/chosen": -3.675584316253662, "logits/rejected": -3.445375919342041, "logps/chosen": -159.32354736328125, "logps/rejected": -151.65151977539062, "loss": 0.4831, "rewards/accuracies": 0.875, "rewards/chosen": 0.037028104066848755, "rewards/margins": 0.8467313647270203, "rewards/rejected": -0.8097033500671387, "step": 5501 }, { "epoch": 0.63, "learning_rate": 1.1138944164813298e-07, "logits/chosen": -2.632962226867676, "logits/rejected": -2.6882009506225586, "logps/chosen": -167.53314208984375, "logps/rejected": -232.84783935546875, "loss": 0.3084, "rewards/accuracies": 0.875, "rewards/chosen": -0.17690376937389374, "rewards/margins": 2.4795751571655273, "rewards/rejected": -2.6564788818359375, "step": 5502 }, { "epoch": 0.63, "learning_rate": 1.1135432517850872e-07, "logits/chosen": -2.5546200275421143, "logits/rejected": -2.5025768280029297, "logps/chosen": -349.92431640625, "logps/rejected": -327.72802734375, "loss": 0.4806, "rewards/accuracies": 0.75, "rewards/chosen": -0.4213631749153137, "rewards/margins": 1.9757661819458008, "rewards/rejected": -2.397129535675049, "step": 5503 }, { "epoch": 0.63, "learning_rate": 1.1131920870888446e-07, "logits/chosen": -2.577798843383789, "logits/rejected": -2.746854066848755, "logps/chosen": -183.76918029785156, "logps/rejected": -207.2682342529297, "loss": 0.4892, "rewards/accuracies": 0.875, "rewards/chosen": -0.3552616834640503, "rewards/margins": 0.9401463866233826, "rewards/rejected": -1.2954081296920776, "step": 5504 }, { "epoch": 0.63, "learning_rate": 1.112840922392602e-07, "logits/chosen": -3.2074503898620605, "logits/rejected": -3.1832382678985596, "logps/chosen": -285.6666564941406, "logps/rejected": -241.51795959472656, "loss": 0.2973, "rewards/accuracies": 0.75, "rewards/chosen": -0.175079807639122, "rewards/margins": 3.100816249847412, "rewards/rejected": -3.275895833969116, "step": 5505 }, { "epoch": 0.63, "learning_rate": 1.1124897576963595e-07, "logits/chosen": -3.2883517742156982, "logits/rejected": -3.1571602821350098, "logps/chosen": -219.53919982910156, "logps/rejected": -305.21295166015625, "loss": 0.231, "rewards/accuracies": 0.875, "rewards/chosen": -0.4753226339817047, "rewards/margins": 2.1593844890594482, "rewards/rejected": -2.634706974029541, "step": 5506 }, { "epoch": 0.63, "learning_rate": 1.1121385930001171e-07, "logits/chosen": -3.3992414474487305, "logits/rejected": -3.351496696472168, "logps/chosen": -295.09014892578125, "logps/rejected": -160.6492919921875, "loss": 0.4097, "rewards/accuracies": 0.875, "rewards/chosen": -0.23790279030799866, "rewards/margins": 2.086582899093628, "rewards/rejected": -2.3244857788085938, "step": 5507 }, { "epoch": 0.63, "learning_rate": 1.1117874283038745e-07, "logits/chosen": -2.9079818725585938, "logits/rejected": -3.1298067569732666, "logps/chosen": -392.1679992675781, "logps/rejected": -333.46246337890625, "loss": 0.7285, "rewards/accuracies": 0.625, "rewards/chosen": -0.1759652942419052, "rewards/margins": 2.1347365379333496, "rewards/rejected": -2.310701608657837, "step": 5508 }, { "epoch": 0.64, "learning_rate": 1.1114362636076319e-07, "logits/chosen": -2.898730993270874, "logits/rejected": -2.7392079830169678, "logps/chosen": -273.8663635253906, "logps/rejected": -279.561767578125, "loss": 0.3804, "rewards/accuracies": 0.875, "rewards/chosen": -0.1743800789117813, "rewards/margins": 1.2793071269989014, "rewards/rejected": -1.4536871910095215, "step": 5509 }, { "epoch": 0.64, "learning_rate": 1.1110850989113894e-07, "logits/chosen": -3.3898472785949707, "logits/rejected": -3.0971789360046387, "logps/chosen": -145.29794311523438, "logps/rejected": -105.98072814941406, "loss": 0.7936, "rewards/accuracies": 0.625, "rewards/chosen": -0.5425716638565063, "rewards/margins": 0.16475412249565125, "rewards/rejected": -0.70732581615448, "step": 5510 }, { "epoch": 0.64, "learning_rate": 1.1107339342151468e-07, "logits/chosen": -3.1593804359436035, "logits/rejected": -3.061960458755493, "logps/chosen": -186.45339965820312, "logps/rejected": -199.9506072998047, "loss": 0.3424, "rewards/accuracies": 0.875, "rewards/chosen": 0.07303686439990997, "rewards/margins": 1.3414933681488037, "rewards/rejected": -1.2684564590454102, "step": 5511 }, { "epoch": 0.64, "learning_rate": 1.1103827695189042e-07, "logits/chosen": -3.246035575866699, "logits/rejected": -2.9584169387817383, "logps/chosen": -186.2506103515625, "logps/rejected": -213.4463348388672, "loss": 0.5063, "rewards/accuracies": 0.625, "rewards/chosen": -0.08119082450866699, "rewards/margins": 1.144974708557129, "rewards/rejected": -1.226165533065796, "step": 5512 }, { "epoch": 0.64, "learning_rate": 1.1100316048226618e-07, "logits/chosen": -3.345869541168213, "logits/rejected": -2.9399325847625732, "logps/chosen": -266.8641357421875, "logps/rejected": -268.50604248046875, "loss": 0.1933, "rewards/accuracies": 0.875, "rewards/chosen": 0.05347121134400368, "rewards/margins": 2.6958200931549072, "rewards/rejected": -2.6423490047454834, "step": 5513 }, { "epoch": 0.64, "learning_rate": 1.1096804401264193e-07, "logits/chosen": -3.7046170234680176, "logits/rejected": -3.8143067359924316, "logps/chosen": -225.65672302246094, "logps/rejected": -219.10513305664062, "loss": 0.3799, "rewards/accuracies": 0.625, "rewards/chosen": -0.09888547658920288, "rewards/margins": 1.8516778945922852, "rewards/rejected": -1.9505633115768433, "step": 5514 }, { "epoch": 0.64, "learning_rate": 1.1093292754301767e-07, "logits/chosen": -3.517336368560791, "logits/rejected": -3.5595571994781494, "logps/chosen": -167.04100036621094, "logps/rejected": -172.13621520996094, "loss": 0.592, "rewards/accuracies": 0.625, "rewards/chosen": 0.0002405792474746704, "rewards/margins": 1.1323533058166504, "rewards/rejected": -1.132112741470337, "step": 5515 }, { "epoch": 0.64, "learning_rate": 1.1089781107339341e-07, "logits/chosen": -3.27402663230896, "logits/rejected": -3.2704644203186035, "logps/chosen": -187.83917236328125, "logps/rejected": -196.63037109375, "loss": 0.1935, "rewards/accuracies": 0.875, "rewards/chosen": -0.00946022942662239, "rewards/margins": 2.1864519119262695, "rewards/rejected": -2.1959123611450195, "step": 5516 }, { "epoch": 0.64, "learning_rate": 1.1086269460376915e-07, "logits/chosen": -2.6184921264648438, "logits/rejected": -2.687542200088501, "logps/chosen": -133.64236450195312, "logps/rejected": -264.16558837890625, "loss": 0.1079, "rewards/accuracies": 1.0, "rewards/chosen": -0.22215205430984497, "rewards/margins": 3.58726167678833, "rewards/rejected": -3.8094139099121094, "step": 5517 }, { "epoch": 0.64, "learning_rate": 1.1082757813414492e-07, "logits/chosen": -3.6257870197296143, "logits/rejected": -3.62660551071167, "logps/chosen": -224.2109375, "logps/rejected": -225.47927856445312, "loss": 0.1755, "rewards/accuracies": 1.0, "rewards/chosen": 0.022235482931137085, "rewards/margins": 2.357403039932251, "rewards/rejected": -2.335167407989502, "step": 5518 }, { "epoch": 0.64, "learning_rate": 1.1079246166452066e-07, "logits/chosen": -3.2906100749969482, "logits/rejected": -3.007499933242798, "logps/chosen": -369.81866455078125, "logps/rejected": -219.42481994628906, "loss": 0.3125, "rewards/accuracies": 0.875, "rewards/chosen": 0.30697324872016907, "rewards/margins": 1.4028041362762451, "rewards/rejected": -1.0958307981491089, "step": 5519 }, { "epoch": 0.64, "learning_rate": 1.107573451948964e-07, "logits/chosen": -2.570202350616455, "logits/rejected": -2.583059787750244, "logps/chosen": -510.99200439453125, "logps/rejected": -262.88958740234375, "loss": 0.487, "rewards/accuracies": 0.75, "rewards/chosen": -0.01783415675163269, "rewards/margins": 2.1254565715789795, "rewards/rejected": -2.1432905197143555, "step": 5520 }, { "epoch": 0.64, "learning_rate": 1.1072222872527214e-07, "logits/chosen": -3.207482099533081, "logits/rejected": -2.9200170040130615, "logps/chosen": -234.81971740722656, "logps/rejected": -192.84413146972656, "loss": 0.3051, "rewards/accuracies": 1.0, "rewards/chosen": 0.3027254045009613, "rewards/margins": 1.2280296087265015, "rewards/rejected": -0.9253041744232178, "step": 5521 }, { "epoch": 0.64, "learning_rate": 1.106871122556479e-07, "logits/chosen": -2.925812244415283, "logits/rejected": -3.4228882789611816, "logps/chosen": -200.66900634765625, "logps/rejected": -242.32943725585938, "loss": 0.0963, "rewards/accuracies": 1.0, "rewards/chosen": 0.20015142858028412, "rewards/margins": 2.682528018951416, "rewards/rejected": -2.48237681388855, "step": 5522 }, { "epoch": 0.64, "learning_rate": 1.1065199578602364e-07, "logits/chosen": -3.066866159439087, "logits/rejected": -3.4221444129943848, "logps/chosen": -241.11843872070312, "logps/rejected": -175.12295532226562, "loss": 0.3941, "rewards/accuracies": 0.75, "rewards/chosen": 0.12313181161880493, "rewards/margins": 2.260941982269287, "rewards/rejected": -2.137809991836548, "step": 5523 }, { "epoch": 0.64, "learning_rate": 1.1061687931639939e-07, "logits/chosen": -2.6994361877441406, "logits/rejected": -2.991220712661743, "logps/chosen": -252.54864501953125, "logps/rejected": -307.52001953125, "loss": 0.3453, "rewards/accuracies": 0.875, "rewards/chosen": -0.1569393128156662, "rewards/margins": 2.6649858951568604, "rewards/rejected": -2.821925401687622, "step": 5524 }, { "epoch": 0.64, "learning_rate": 1.1058176284677513e-07, "logits/chosen": -2.7881903648376465, "logits/rejected": -3.095353603363037, "logps/chosen": -174.21531677246094, "logps/rejected": -354.4920959472656, "loss": 0.4865, "rewards/accuracies": 0.875, "rewards/chosen": 0.03687632083892822, "rewards/margins": 1.4258685111999512, "rewards/rejected": -1.3889920711517334, "step": 5525 }, { "epoch": 0.64, "learning_rate": 1.1054664637715087e-07, "logits/chosen": -3.7372450828552246, "logits/rejected": -3.546281337738037, "logps/chosen": -200.64089965820312, "logps/rejected": -160.68017578125, "loss": 0.6084, "rewards/accuracies": 0.5, "rewards/chosen": -0.246497243642807, "rewards/margins": 0.6750624775886536, "rewards/rejected": -0.9215598106384277, "step": 5526 }, { "epoch": 0.64, "learning_rate": 1.1051152990752663e-07, "logits/chosen": -2.849851131439209, "logits/rejected": -3.046604871749878, "logps/chosen": -292.5186462402344, "logps/rejected": -327.0318298339844, "loss": 0.2668, "rewards/accuracies": 1.0, "rewards/chosen": 0.17079542577266693, "rewards/margins": 1.8089994192123413, "rewards/rejected": -1.6382039785385132, "step": 5527 }, { "epoch": 0.64, "learning_rate": 1.1047641343790237e-07, "logits/chosen": -3.5392556190490723, "logits/rejected": -4.038215637207031, "logps/chosen": -140.36862182617188, "logps/rejected": -206.01290893554688, "loss": 0.2809, "rewards/accuracies": 0.875, "rewards/chosen": -0.33839088678359985, "rewards/margins": 1.8980073928833008, "rewards/rejected": -2.236398220062256, "step": 5528 }, { "epoch": 0.64, "learning_rate": 1.1044129696827811e-07, "logits/chosen": -3.5254416465759277, "logits/rejected": -3.6530165672302246, "logps/chosen": -180.14093017578125, "logps/rejected": -188.54705810546875, "loss": 0.2533, "rewards/accuracies": 0.875, "rewards/chosen": 0.12502899765968323, "rewards/margins": 2.089557647705078, "rewards/rejected": -1.9645286798477173, "step": 5529 }, { "epoch": 0.64, "learning_rate": 1.1040618049865386e-07, "logits/chosen": -2.4629435539245605, "logits/rejected": -2.5604774951934814, "logps/chosen": -399.15191650390625, "logps/rejected": -222.55772399902344, "loss": 0.2493, "rewards/accuracies": 0.875, "rewards/chosen": 0.11885417252779007, "rewards/margins": 2.4662117958068848, "rewards/rejected": -2.3473575115203857, "step": 5530 }, { "epoch": 0.64, "learning_rate": 1.1037106402902962e-07, "logits/chosen": -3.161863327026367, "logits/rejected": -3.0353612899780273, "logps/chosen": -283.15850830078125, "logps/rejected": -319.2021484375, "loss": 0.5942, "rewards/accuracies": 0.625, "rewards/chosen": -0.42695289850234985, "rewards/margins": 1.6866494417190552, "rewards/rejected": -2.11360239982605, "step": 5531 }, { "epoch": 0.64, "learning_rate": 1.1033594755940536e-07, "logits/chosen": -3.0062460899353027, "logits/rejected": -2.817401647567749, "logps/chosen": -346.0284423828125, "logps/rejected": -325.31866455078125, "loss": 0.5783, "rewards/accuracies": 0.75, "rewards/chosen": -0.22136089205741882, "rewards/margins": 0.5394108295440674, "rewards/rejected": -0.7607717514038086, "step": 5532 }, { "epoch": 0.64, "learning_rate": 1.103008310897811e-07, "logits/chosen": -3.115601062774658, "logits/rejected": -2.8772406578063965, "logps/chosen": -266.0494384765625, "logps/rejected": -293.56298828125, "loss": 0.619, "rewards/accuracies": 0.75, "rewards/chosen": -0.6358642578125, "rewards/margins": 0.9450751543045044, "rewards/rejected": -1.5809394121170044, "step": 5533 }, { "epoch": 0.64, "learning_rate": 1.1026571462015684e-07, "logits/chosen": -3.480530023574829, "logits/rejected": -3.662848949432373, "logps/chosen": -117.86821746826172, "logps/rejected": -189.725341796875, "loss": 0.4649, "rewards/accuracies": 0.75, "rewards/chosen": -0.004321090877056122, "rewards/margins": 2.006197214126587, "rewards/rejected": -2.0105185508728027, "step": 5534 }, { "epoch": 0.64, "learning_rate": 1.102305981505326e-07, "logits/chosen": -3.716085195541382, "logits/rejected": -3.654440402984619, "logps/chosen": -220.44017028808594, "logps/rejected": -286.96490478515625, "loss": 0.3354, "rewards/accuracies": 0.875, "rewards/chosen": 0.4073021113872528, "rewards/margins": 2.4694888591766357, "rewards/rejected": -2.0621869564056396, "step": 5535 }, { "epoch": 0.64, "learning_rate": 1.1019548168090835e-07, "logits/chosen": -2.8730549812316895, "logits/rejected": -3.0500383377075195, "logps/chosen": -349.35247802734375, "logps/rejected": -264.81915283203125, "loss": 0.4437, "rewards/accuracies": 0.875, "rewards/chosen": 0.4813695549964905, "rewards/margins": 2.3494057655334473, "rewards/rejected": -1.8680362701416016, "step": 5536 }, { "epoch": 0.64, "learning_rate": 1.1016036521128409e-07, "logits/chosen": -3.096554756164551, "logits/rejected": -3.180095911026001, "logps/chosen": -172.13525390625, "logps/rejected": -198.8572235107422, "loss": 0.6499, "rewards/accuracies": 0.625, "rewards/chosen": -0.5295023918151855, "rewards/margins": 0.6526376008987427, "rewards/rejected": -1.1821398735046387, "step": 5537 }, { "epoch": 0.64, "learning_rate": 1.1012524874165983e-07, "logits/chosen": -3.1378531455993652, "logits/rejected": -3.0356433391571045, "logps/chosen": -148.58685302734375, "logps/rejected": -188.75653076171875, "loss": 0.229, "rewards/accuracies": 1.0, "rewards/chosen": 0.3977190852165222, "rewards/margins": 2.714970588684082, "rewards/rejected": -2.317251443862915, "step": 5538 }, { "epoch": 0.64, "learning_rate": 1.1009013227203558e-07, "logits/chosen": -3.0705313682556152, "logits/rejected": -3.3714499473571777, "logps/chosen": -193.19554138183594, "logps/rejected": -220.8395538330078, "loss": 0.4075, "rewards/accuracies": 0.625, "rewards/chosen": -0.0800720751285553, "rewards/margins": 2.1725826263427734, "rewards/rejected": -2.252654552459717, "step": 5539 }, { "epoch": 0.64, "learning_rate": 1.1005501580241132e-07, "logits/chosen": -2.7951762676239014, "logits/rejected": -2.848287582397461, "logps/chosen": -365.0451354980469, "logps/rejected": -252.99771118164062, "loss": 0.3297, "rewards/accuracies": 0.875, "rewards/chosen": -0.03073069453239441, "rewards/margins": 1.3673338890075684, "rewards/rejected": -1.3980646133422852, "step": 5540 }, { "epoch": 0.64, "learning_rate": 1.1001989933278707e-07, "logits/chosen": -3.3802058696746826, "logits/rejected": -3.7321338653564453, "logps/chosen": -208.88134765625, "logps/rejected": -333.64434814453125, "loss": 0.378, "rewards/accuracies": 0.75, "rewards/chosen": -0.2732570767402649, "rewards/margins": 2.0252866744995117, "rewards/rejected": -2.298543691635132, "step": 5541 }, { "epoch": 0.64, "learning_rate": 1.0998478286316282e-07, "logits/chosen": -3.1483664512634277, "logits/rejected": -3.3012075424194336, "logps/chosen": -291.90533447265625, "logps/rejected": -316.12078857421875, "loss": 1.0098, "rewards/accuracies": 0.5, "rewards/chosen": -0.6501993536949158, "rewards/margins": -0.13786569237709045, "rewards/rejected": -0.5123336911201477, "step": 5542 }, { "epoch": 0.64, "learning_rate": 1.0994966639353857e-07, "logits/chosen": -3.832737445831299, "logits/rejected": -3.5804250240325928, "logps/chosen": -241.39788818359375, "logps/rejected": -238.79476928710938, "loss": 0.4502, "rewards/accuracies": 0.75, "rewards/chosen": -0.6003098487854004, "rewards/margins": 1.7608039379119873, "rewards/rejected": -2.361114025115967, "step": 5543 }, { "epoch": 0.64, "learning_rate": 1.0991454992391431e-07, "logits/chosen": -3.0383307933807373, "logits/rejected": -2.8511760234832764, "logps/chosen": -178.01217651367188, "logps/rejected": -200.23818969726562, "loss": 0.5132, "rewards/accuracies": 0.875, "rewards/chosen": -0.5584123730659485, "rewards/margins": 0.9145832657814026, "rewards/rejected": -1.472995638847351, "step": 5544 }, { "epoch": 0.64, "learning_rate": 1.0987943345429005e-07, "logits/chosen": -3.308229923248291, "logits/rejected": -3.42641544342041, "logps/chosen": -204.2576141357422, "logps/rejected": -284.22882080078125, "loss": 0.7145, "rewards/accuracies": 0.625, "rewards/chosen": -0.951913595199585, "rewards/margins": 1.8994026184082031, "rewards/rejected": -2.851316452026367, "step": 5545 }, { "epoch": 0.64, "learning_rate": 1.0984431698466579e-07, "logits/chosen": -2.736722469329834, "logits/rejected": -2.847074031829834, "logps/chosen": -212.12921142578125, "logps/rejected": -300.3636474609375, "loss": 0.4496, "rewards/accuracies": 0.875, "rewards/chosen": -0.4790023863315582, "rewards/margins": 0.8247756958007812, "rewards/rejected": -1.3037781715393066, "step": 5546 }, { "epoch": 0.64, "learning_rate": 1.0980920051504156e-07, "logits/chosen": -2.9862406253814697, "logits/rejected": -2.9840121269226074, "logps/chosen": -308.8196716308594, "logps/rejected": -266.7723693847656, "loss": 0.2281, "rewards/accuracies": 1.0, "rewards/chosen": 0.05951756238937378, "rewards/margins": 1.5760716199874878, "rewards/rejected": -1.5165541172027588, "step": 5547 }, { "epoch": 0.64, "learning_rate": 1.097740840454173e-07, "logits/chosen": -3.0478577613830566, "logits/rejected": -3.2702858448028564, "logps/chosen": -160.64198303222656, "logps/rejected": -237.85113525390625, "loss": 0.6475, "rewards/accuracies": 0.75, "rewards/chosen": -0.5977204442024231, "rewards/margins": 1.1510095596313477, "rewards/rejected": -1.748729944229126, "step": 5548 }, { "epoch": 0.64, "learning_rate": 1.0973896757579304e-07, "logits/chosen": -3.814485788345337, "logits/rejected": -3.588376760482788, "logps/chosen": -143.7447509765625, "logps/rejected": -278.7994384765625, "loss": 0.1919, "rewards/accuracies": 0.875, "rewards/chosen": -0.10172916948795319, "rewards/margins": 2.988987922668457, "rewards/rejected": -3.090717077255249, "step": 5549 }, { "epoch": 0.64, "learning_rate": 1.0970385110616878e-07, "logits/chosen": -3.194347858428955, "logits/rejected": -3.0685272216796875, "logps/chosen": -243.16021728515625, "logps/rejected": -215.71728515625, "loss": 0.3894, "rewards/accuracies": 0.875, "rewards/chosen": -0.19392141699790955, "rewards/margins": 1.0382965803146362, "rewards/rejected": -1.2322180271148682, "step": 5550 }, { "epoch": 0.64, "learning_rate": 1.0966873463654455e-07, "logits/chosen": -3.2503294944763184, "logits/rejected": -2.9025073051452637, "logps/chosen": -331.2242126464844, "logps/rejected": -341.3046569824219, "loss": 0.6131, "rewards/accuracies": 0.5, "rewards/chosen": -1.1706608533859253, "rewards/margins": 0.782849133014679, "rewards/rejected": -1.95350980758667, "step": 5551 }, { "epoch": 0.64, "learning_rate": 1.0963361816692029e-07, "logits/chosen": -3.2189316749572754, "logits/rejected": -2.8718652725219727, "logps/chosen": -416.78631591796875, "logps/rejected": -246.37713623046875, "loss": 0.1247, "rewards/accuracies": 1.0, "rewards/chosen": -0.07266174256801605, "rewards/margins": 2.642735242843628, "rewards/rejected": -2.7153968811035156, "step": 5552 }, { "epoch": 0.64, "learning_rate": 1.0959850169729603e-07, "logits/chosen": -3.575732707977295, "logits/rejected": -3.4063620567321777, "logps/chosen": -327.4098205566406, "logps/rejected": -285.12481689453125, "loss": 0.1059, "rewards/accuracies": 1.0, "rewards/chosen": 0.45916301012039185, "rewards/margins": 2.975454092025757, "rewards/rejected": -2.5162911415100098, "step": 5553 }, { "epoch": 0.64, "learning_rate": 1.0956338522767177e-07, "logits/chosen": -3.0119707584381104, "logits/rejected": -3.1084108352661133, "logps/chosen": -223.31387329101562, "logps/rejected": -291.1942443847656, "loss": 0.5748, "rewards/accuracies": 0.625, "rewards/chosen": -0.3868541717529297, "rewards/margins": 1.4171669483184814, "rewards/rejected": -1.8040211200714111, "step": 5554 }, { "epoch": 0.64, "learning_rate": 1.0952826875804752e-07, "logits/chosen": -3.3900554180145264, "logits/rejected": -3.0866732597351074, "logps/chosen": -321.4344177246094, "logps/rejected": -292.443603515625, "loss": 0.3488, "rewards/accuracies": 0.75, "rewards/chosen": -0.33955633640289307, "rewards/margins": 1.567571759223938, "rewards/rejected": -1.907128095626831, "step": 5555 }, { "epoch": 0.64, "learning_rate": 1.0949315228842326e-07, "logits/chosen": -2.999690055847168, "logits/rejected": -2.804152250289917, "logps/chosen": -256.8074645996094, "logps/rejected": -204.11727905273438, "loss": 0.3829, "rewards/accuracies": 0.875, "rewards/chosen": -0.17519989609718323, "rewards/margins": 1.1310919523239136, "rewards/rejected": -1.3062918186187744, "step": 5556 }, { "epoch": 0.64, "learning_rate": 1.09458035818799e-07, "logits/chosen": -2.6042256355285645, "logits/rejected": -2.86977219581604, "logps/chosen": -183.22239685058594, "logps/rejected": -197.10342407226562, "loss": 0.4679, "rewards/accuracies": 0.75, "rewards/chosen": 0.07626602798700333, "rewards/margins": 1.3117234706878662, "rewards/rejected": -1.235457420349121, "step": 5557 }, { "epoch": 0.64, "learning_rate": 1.0942291934917476e-07, "logits/chosen": -3.276357650756836, "logits/rejected": -3.391195774078369, "logps/chosen": -337.4051818847656, "logps/rejected": -303.994140625, "loss": 0.481, "rewards/accuracies": 0.75, "rewards/chosen": -0.2578878104686737, "rewards/margins": 1.165971040725708, "rewards/rejected": -1.4238587617874146, "step": 5558 }, { "epoch": 0.64, "learning_rate": 1.0938780287955051e-07, "logits/chosen": -3.352724552154541, "logits/rejected": -3.3108954429626465, "logps/chosen": -390.0657958984375, "logps/rejected": -295.61553955078125, "loss": 0.1099, "rewards/accuracies": 1.0, "rewards/chosen": 0.5092910528182983, "rewards/margins": 3.520625352859497, "rewards/rejected": -3.011334180831909, "step": 5559 }, { "epoch": 0.64, "learning_rate": 1.0935268640992625e-07, "logits/chosen": -3.1597836017608643, "logits/rejected": -3.287008285522461, "logps/chosen": -120.779296875, "logps/rejected": -241.67713928222656, "loss": 0.3208, "rewards/accuracies": 1.0, "rewards/chosen": 0.09977317601442337, "rewards/margins": 2.0206854343414307, "rewards/rejected": -1.9209123849868774, "step": 5560 }, { "epoch": 0.64, "learning_rate": 1.0931756994030199e-07, "logits/chosen": -2.970947504043579, "logits/rejected": -2.5185866355895996, "logps/chosen": -386.9381103515625, "logps/rejected": -235.0163116455078, "loss": 0.1764, "rewards/accuracies": 0.875, "rewards/chosen": 0.9169564247131348, "rewards/margins": 2.645935297012329, "rewards/rejected": -1.7289788722991943, "step": 5561 }, { "epoch": 0.64, "learning_rate": 1.0928245347067773e-07, "logits/chosen": -3.4842095375061035, "logits/rejected": -3.3589704036712646, "logps/chosen": -341.6241149902344, "logps/rejected": -378.51959228515625, "loss": 0.3594, "rewards/accuracies": 0.875, "rewards/chosen": -0.03753170371055603, "rewards/margins": 1.4058305025100708, "rewards/rejected": -1.4433623552322388, "step": 5562 }, { "epoch": 0.64, "learning_rate": 1.092473370010535e-07, "logits/chosen": -3.228346109390259, "logits/rejected": -3.049185276031494, "logps/chosen": -271.5245361328125, "logps/rejected": -276.07611083984375, "loss": 0.5067, "rewards/accuracies": 0.625, "rewards/chosen": 0.019838716834783554, "rewards/margins": 1.2017756700515747, "rewards/rejected": -1.1819369792938232, "step": 5563 }, { "epoch": 0.64, "learning_rate": 1.0921222053142924e-07, "logits/chosen": -3.4402101039886475, "logits/rejected": -3.1711840629577637, "logps/chosen": -384.1895751953125, "logps/rejected": -361.6351013183594, "loss": 0.9413, "rewards/accuracies": 0.625, "rewards/chosen": -0.775615930557251, "rewards/margins": 0.20605912804603577, "rewards/rejected": -0.9816750884056091, "step": 5564 }, { "epoch": 0.64, "learning_rate": 1.0917710406180498e-07, "logits/chosen": -2.81465744972229, "logits/rejected": -2.9696295261383057, "logps/chosen": -302.9178771972656, "logps/rejected": -244.16351318359375, "loss": 0.4386, "rewards/accuracies": 0.625, "rewards/chosen": -0.8099825382232666, "rewards/margins": 1.150705337524414, "rewards/rejected": -1.9606878757476807, "step": 5565 }, { "epoch": 0.64, "learning_rate": 1.0914198759218072e-07, "logits/chosen": -3.4526453018188477, "logits/rejected": -3.2858376502990723, "logps/chosen": -252.64450073242188, "logps/rejected": -304.3641052246094, "loss": 0.5275, "rewards/accuracies": 0.75, "rewards/chosen": -1.2996978759765625, "rewards/margins": 0.9118590950965881, "rewards/rejected": -2.211556911468506, "step": 5566 }, { "epoch": 0.64, "learning_rate": 1.0910687112255648e-07, "logits/chosen": -3.6942102909088135, "logits/rejected": -3.684047222137451, "logps/chosen": -263.5755615234375, "logps/rejected": -296.6995544433594, "loss": 0.5639, "rewards/accuracies": 0.625, "rewards/chosen": -0.05185417830944061, "rewards/margins": 0.8827250003814697, "rewards/rejected": -0.9345791935920715, "step": 5567 }, { "epoch": 0.64, "learning_rate": 1.0907175465293223e-07, "logits/chosen": -4.03670597076416, "logits/rejected": -3.77561354637146, "logps/chosen": -146.99713134765625, "logps/rejected": -147.85215759277344, "loss": 0.5525, "rewards/accuracies": 0.625, "rewards/chosen": -0.2682439684867859, "rewards/margins": 1.5591005086898804, "rewards/rejected": -1.827344536781311, "step": 5568 }, { "epoch": 0.64, "learning_rate": 1.0903663818330797e-07, "logits/chosen": -3.3505730628967285, "logits/rejected": -3.831346035003662, "logps/chosen": -109.84162139892578, "logps/rejected": -224.5396728515625, "loss": 0.3552, "rewards/accuracies": 0.625, "rewards/chosen": -0.17383822798728943, "rewards/margins": 2.12558913230896, "rewards/rejected": -2.299427032470703, "step": 5569 }, { "epoch": 0.64, "learning_rate": 1.0900152171368371e-07, "logits/chosen": -3.6277406215667725, "logits/rejected": -4.161089897155762, "logps/chosen": -261.3705139160156, "logps/rejected": -379.6358642578125, "loss": 0.4551, "rewards/accuracies": 0.75, "rewards/chosen": -0.12518097460269928, "rewards/margins": 1.8030238151550293, "rewards/rejected": -1.9282046556472778, "step": 5570 }, { "epoch": 0.64, "learning_rate": 1.0896640524405947e-07, "logits/chosen": -3.1875648498535156, "logits/rejected": -2.9674971103668213, "logps/chosen": -204.2544708251953, "logps/rejected": -239.86083984375, "loss": 0.5764, "rewards/accuracies": 0.5, "rewards/chosen": -0.5869861841201782, "rewards/margins": 1.5737985372543335, "rewards/rejected": -2.1607847213745117, "step": 5571 }, { "epoch": 0.64, "learning_rate": 1.089312887744352e-07, "logits/chosen": -3.281020402908325, "logits/rejected": -3.0123519897460938, "logps/chosen": -242.91847229003906, "logps/rejected": -158.44375610351562, "loss": 0.223, "rewards/accuracies": 0.875, "rewards/chosen": 0.06888549029827118, "rewards/margins": 1.9734975099563599, "rewards/rejected": -1.9046120643615723, "step": 5572 }, { "epoch": 0.64, "learning_rate": 1.0889617230481095e-07, "logits/chosen": -2.486341714859009, "logits/rejected": -2.53477144241333, "logps/chosen": -383.22998046875, "logps/rejected": -254.41238403320312, "loss": 0.5796, "rewards/accuracies": 0.625, "rewards/chosen": -0.2264101207256317, "rewards/margins": 0.6975858211517334, "rewards/rejected": -0.9239959120750427, "step": 5573 }, { "epoch": 0.64, "learning_rate": 1.0886105583518669e-07, "logits/chosen": -3.2129435539245605, "logits/rejected": -3.1654787063598633, "logps/chosen": -281.7437744140625, "logps/rejected": -430.1336975097656, "loss": 0.309, "rewards/accuracies": 0.875, "rewards/chosen": 0.05344715714454651, "rewards/margins": 1.4276573657989502, "rewards/rejected": -1.3742101192474365, "step": 5574 }, { "epoch": 0.64, "learning_rate": 1.0882593936556244e-07, "logits/chosen": -3.0444867610931396, "logits/rejected": -2.699658155441284, "logps/chosen": -110.93022155761719, "logps/rejected": -239.550537109375, "loss": 0.2859, "rewards/accuracies": 0.875, "rewards/chosen": 0.5824467539787292, "rewards/margins": 2.487058162689209, "rewards/rejected": -1.9046114683151245, "step": 5575 }, { "epoch": 0.64, "learning_rate": 1.087908228959382e-07, "logits/chosen": -3.68730092048645, "logits/rejected": -3.498046636581421, "logps/chosen": -401.29443359375, "logps/rejected": -299.57666015625, "loss": 0.7253, "rewards/accuracies": 0.875, "rewards/chosen": 0.08355779200792313, "rewards/margins": 0.5526366233825684, "rewards/rejected": -0.46907883882522583, "step": 5576 }, { "epoch": 0.64, "learning_rate": 1.0875570642631394e-07, "logits/chosen": -3.2425222396850586, "logits/rejected": -3.1226930618286133, "logps/chosen": -125.64039611816406, "logps/rejected": -172.56283569335938, "loss": 0.4, "rewards/accuracies": 0.875, "rewards/chosen": 0.8648499846458435, "rewards/margins": 2.569629669189453, "rewards/rejected": -1.704779863357544, "step": 5577 }, { "epoch": 0.64, "learning_rate": 1.0872058995668968e-07, "logits/chosen": -2.093470573425293, "logits/rejected": -2.1296279430389404, "logps/chosen": -211.73025512695312, "logps/rejected": -227.899169921875, "loss": 0.4287, "rewards/accuracies": 0.75, "rewards/chosen": -0.2163473665714264, "rewards/margins": 1.1292474269866943, "rewards/rejected": -1.3455947637557983, "step": 5578 }, { "epoch": 0.64, "learning_rate": 1.0868547348706542e-07, "logits/chosen": -3.247188091278076, "logits/rejected": -3.2769057750701904, "logps/chosen": -306.28924560546875, "logps/rejected": -282.15765380859375, "loss": 0.2376, "rewards/accuracies": 1.0, "rewards/chosen": -0.06101532280445099, "rewards/margins": 1.7930705547332764, "rewards/rejected": -1.854085922241211, "step": 5579 }, { "epoch": 0.64, "learning_rate": 1.0865035701744118e-07, "logits/chosen": -3.0781497955322266, "logits/rejected": -3.3715529441833496, "logps/chosen": -260.44757080078125, "logps/rejected": -310.0091247558594, "loss": 0.4823, "rewards/accuracies": 0.625, "rewards/chosen": -0.13576748967170715, "rewards/margins": 1.4480490684509277, "rewards/rejected": -1.5838165283203125, "step": 5580 }, { "epoch": 0.64, "learning_rate": 1.0861524054781692e-07, "logits/chosen": -2.9371581077575684, "logits/rejected": -2.5432183742523193, "logps/chosen": -304.43212890625, "logps/rejected": -186.59397888183594, "loss": 0.3481, "rewards/accuracies": 0.875, "rewards/chosen": -0.14453229308128357, "rewards/margins": 1.2859280109405518, "rewards/rejected": -1.4304603338241577, "step": 5581 }, { "epoch": 0.64, "learning_rate": 1.0858012407819267e-07, "logits/chosen": -2.3198423385620117, "logits/rejected": -2.144890069961548, "logps/chosen": -308.636962890625, "logps/rejected": -346.78814697265625, "loss": 0.3071, "rewards/accuracies": 0.875, "rewards/chosen": 0.2574445307254791, "rewards/margins": 2.143583059310913, "rewards/rejected": -1.8861385583877563, "step": 5582 }, { "epoch": 0.64, "learning_rate": 1.085450076085684e-07, "logits/chosen": -3.5493483543395996, "logits/rejected": -3.2361743450164795, "logps/chosen": -193.277099609375, "logps/rejected": -210.62564086914062, "loss": 0.5259, "rewards/accuracies": 0.75, "rewards/chosen": -0.2196657508611679, "rewards/margins": 1.386404275894165, "rewards/rejected": -1.6060700416564941, "step": 5583 }, { "epoch": 0.64, "learning_rate": 1.0850989113894416e-07, "logits/chosen": -3.3553285598754883, "logits/rejected": -3.612657308578491, "logps/chosen": -310.71856689453125, "logps/rejected": -184.47161865234375, "loss": 0.301, "rewards/accuracies": 0.75, "rewards/chosen": 0.09465652704238892, "rewards/margins": 1.9318721294403076, "rewards/rejected": -1.8372156620025635, "step": 5584 }, { "epoch": 0.64, "learning_rate": 1.0847477466931991e-07, "logits/chosen": -3.2950055599212646, "logits/rejected": -3.2470502853393555, "logps/chosen": -208.34292602539062, "logps/rejected": -193.65957641601562, "loss": 0.258, "rewards/accuracies": 0.875, "rewards/chosen": -0.22717347741127014, "rewards/margins": 1.8450474739074707, "rewards/rejected": -2.072220802307129, "step": 5585 }, { "epoch": 0.64, "learning_rate": 1.0843965819969565e-07, "logits/chosen": -3.690366268157959, "logits/rejected": -3.8127546310424805, "logps/chosen": -257.842529296875, "logps/rejected": -262.7574462890625, "loss": 0.593, "rewards/accuracies": 0.625, "rewards/chosen": -0.33239611983299255, "rewards/margins": 1.1095035076141357, "rewards/rejected": -1.4418996572494507, "step": 5586 }, { "epoch": 0.64, "learning_rate": 1.084045417300714e-07, "logits/chosen": -3.2315633296966553, "logits/rejected": -3.470195770263672, "logps/chosen": -190.51394653320312, "logps/rejected": -255.328857421875, "loss": 0.3484, "rewards/accuracies": 0.75, "rewards/chosen": -0.6538544297218323, "rewards/margins": 1.645661473274231, "rewards/rejected": -2.299515724182129, "step": 5587 }, { "epoch": 0.64, "learning_rate": 1.0836942526044715e-07, "logits/chosen": -2.4882209300994873, "logits/rejected": -2.4565813541412354, "logps/chosen": -328.62774658203125, "logps/rejected": -271.49755859375, "loss": 0.6423, "rewards/accuracies": 0.5, "rewards/chosen": -0.2788046598434448, "rewards/margins": 0.7455707788467407, "rewards/rejected": -1.024375557899475, "step": 5588 }, { "epoch": 0.64, "learning_rate": 1.0833430879082289e-07, "logits/chosen": -2.9080185890197754, "logits/rejected": -3.0100901126861572, "logps/chosen": -210.41696166992188, "logps/rejected": -238.1894989013672, "loss": 0.2806, "rewards/accuracies": 0.875, "rewards/chosen": -0.039103079587221146, "rewards/margins": 2.4273297786712646, "rewards/rejected": -2.466432809829712, "step": 5589 }, { "epoch": 0.64, "learning_rate": 1.0829919232119863e-07, "logits/chosen": -2.591337203979492, "logits/rejected": -3.032055377960205, "logps/chosen": -244.86566162109375, "logps/rejected": -195.8192138671875, "loss": 0.5524, "rewards/accuracies": 0.625, "rewards/chosen": -0.485135555267334, "rewards/margins": 0.6468579769134521, "rewards/rejected": -1.1319935321807861, "step": 5590 }, { "epoch": 0.64, "learning_rate": 1.0826407585157437e-07, "logits/chosen": -3.267516851425171, "logits/rejected": -2.9798011779785156, "logps/chosen": -268.37872314453125, "logps/rejected": -336.0381774902344, "loss": 0.148, "rewards/accuracies": 1.0, "rewards/chosen": 0.5347472429275513, "rewards/margins": 2.6066441535949707, "rewards/rejected": -2.071897029876709, "step": 5591 }, { "epoch": 0.64, "learning_rate": 1.0822895938195014e-07, "logits/chosen": -3.5827057361602783, "logits/rejected": -2.9033217430114746, "logps/chosen": -340.9233093261719, "logps/rejected": -256.255859375, "loss": 0.1634, "rewards/accuracies": 1.0, "rewards/chosen": -0.10278189927339554, "rewards/margins": 2.7689175605773926, "rewards/rejected": -2.871699333190918, "step": 5592 }, { "epoch": 0.64, "learning_rate": 1.0819384291232588e-07, "logits/chosen": -3.2179789543151855, "logits/rejected": -3.315683603286743, "logps/chosen": -256.3691711425781, "logps/rejected": -159.93289184570312, "loss": 0.2699, "rewards/accuracies": 1.0, "rewards/chosen": 0.7480844259262085, "rewards/margins": 1.8089112043380737, "rewards/rejected": -1.0608270168304443, "step": 5593 }, { "epoch": 0.64, "learning_rate": 1.0815872644270162e-07, "logits/chosen": -3.0654518604278564, "logits/rejected": -3.1645560264587402, "logps/chosen": -225.15603637695312, "logps/rejected": -164.1244659423828, "loss": 0.3489, "rewards/accuracies": 0.75, "rewards/chosen": -0.181911900639534, "rewards/margins": 1.6123943328857422, "rewards/rejected": -1.7943061590194702, "step": 5594 }, { "epoch": 0.64, "learning_rate": 1.0812360997307736e-07, "logits/chosen": -2.8024024963378906, "logits/rejected": -2.9924545288085938, "logps/chosen": -343.4334716796875, "logps/rejected": -222.19732666015625, "loss": 0.4127, "rewards/accuracies": 0.75, "rewards/chosen": 0.10730773210525513, "rewards/margins": 1.383644938468933, "rewards/rejected": -1.2763371467590332, "step": 5595 }, { "epoch": 0.65, "learning_rate": 1.0808849350345313e-07, "logits/chosen": -2.8340485095977783, "logits/rejected": -2.9356205463409424, "logps/chosen": -210.2417755126953, "logps/rejected": -352.7631530761719, "loss": 0.4619, "rewards/accuracies": 0.75, "rewards/chosen": -0.25834548473358154, "rewards/margins": 1.1179499626159668, "rewards/rejected": -1.3762953281402588, "step": 5596 }, { "epoch": 0.65, "learning_rate": 1.0805337703382887e-07, "logits/chosen": -2.8124303817749023, "logits/rejected": -2.8547677993774414, "logps/chosen": -212.1126251220703, "logps/rejected": -216.63223266601562, "loss": 0.2948, "rewards/accuracies": 1.0, "rewards/chosen": -0.010674461722373962, "rewards/margins": 1.3432278633117676, "rewards/rejected": -1.3539023399353027, "step": 5597 }, { "epoch": 0.65, "learning_rate": 1.0801826056420461e-07, "logits/chosen": -3.4194626808166504, "logits/rejected": -3.1343626976013184, "logps/chosen": -295.12030029296875, "logps/rejected": -317.6312255859375, "loss": 0.6399, "rewards/accuracies": 0.625, "rewards/chosen": -0.6855341196060181, "rewards/margins": 1.664825439453125, "rewards/rejected": -2.3503594398498535, "step": 5598 }, { "epoch": 0.65, "learning_rate": 1.0798314409458035e-07, "logits/chosen": -3.280465602874756, "logits/rejected": -3.305845260620117, "logps/chosen": -276.3464050292969, "logps/rejected": -231.20907592773438, "loss": 0.3184, "rewards/accuracies": 0.75, "rewards/chosen": 0.6885541677474976, "rewards/margins": 1.5935680866241455, "rewards/rejected": -0.905013918876648, "step": 5599 }, { "epoch": 0.65, "learning_rate": 1.079480276249561e-07, "logits/chosen": -2.777763605117798, "logits/rejected": -2.6619744300842285, "logps/chosen": -211.89588928222656, "logps/rejected": -220.45114135742188, "loss": 0.4555, "rewards/accuracies": 0.875, "rewards/chosen": 0.20355479419231415, "rewards/margins": 1.22539222240448, "rewards/rejected": -1.0218374729156494, "step": 5600 }, { "epoch": 0.65, "learning_rate": 1.0791291115533184e-07, "logits/chosen": -2.2357311248779297, "logits/rejected": -2.553327798843384, "logps/chosen": -427.9742736816406, "logps/rejected": -294.8554382324219, "loss": 0.1853, "rewards/accuracies": 1.0, "rewards/chosen": 0.0345647931098938, "rewards/margins": 2.150009870529175, "rewards/rejected": -2.115445137023926, "step": 5601 }, { "epoch": 0.65, "learning_rate": 1.078777946857076e-07, "logits/chosen": -3.3509554862976074, "logits/rejected": -3.012974500656128, "logps/chosen": -267.797607421875, "logps/rejected": -290.2547912597656, "loss": 0.4646, "rewards/accuracies": 0.75, "rewards/chosen": -1.0950521230697632, "rewards/margins": 0.997070848941803, "rewards/rejected": -2.092123031616211, "step": 5602 }, { "epoch": 0.65, "learning_rate": 1.0784267821608334e-07, "logits/chosen": -2.8489294052124023, "logits/rejected": -2.9960145950317383, "logps/chosen": -188.86260986328125, "logps/rejected": -252.52825927734375, "loss": 0.1979, "rewards/accuracies": 1.0, "rewards/chosen": 0.38133570551872253, "rewards/margins": 2.8531885147094727, "rewards/rejected": -2.4718527793884277, "step": 5603 }, { "epoch": 0.65, "learning_rate": 1.0780756174645909e-07, "logits/chosen": -3.2402541637420654, "logits/rejected": -3.009042263031006, "logps/chosen": -397.97711181640625, "logps/rejected": -321.26220703125, "loss": 0.401, "rewards/accuracies": 0.875, "rewards/chosen": -0.10091283917427063, "rewards/margins": 1.5222771167755127, "rewards/rejected": -1.6231898069381714, "step": 5604 }, { "epoch": 0.65, "learning_rate": 1.0777244527683483e-07, "logits/chosen": -3.549013137817383, "logits/rejected": -3.3744044303894043, "logps/chosen": -224.06903076171875, "logps/rejected": -120.34078979492188, "loss": 0.4514, "rewards/accuracies": 0.75, "rewards/chosen": -0.8129531741142273, "rewards/margins": 1.4392356872558594, "rewards/rejected": -2.2521889209747314, "step": 5605 }, { "epoch": 0.65, "learning_rate": 1.0773732880721057e-07, "logits/chosen": -3.1628262996673584, "logits/rejected": -3.16489577293396, "logps/chosen": -386.4945373535156, "logps/rejected": -245.6687469482422, "loss": 0.3294, "rewards/accuracies": 0.875, "rewards/chosen": 0.35679465532302856, "rewards/margins": 1.87200129032135, "rewards/rejected": -1.5152066946029663, "step": 5606 }, { "epoch": 0.65, "learning_rate": 1.0770221233758631e-07, "logits/chosen": -3.3239593505859375, "logits/rejected": -3.4630179405212402, "logps/chosen": -363.105712890625, "logps/rejected": -328.94775390625, "loss": 0.4745, "rewards/accuracies": 0.875, "rewards/chosen": -0.11541348695755005, "rewards/margins": 2.149893283843994, "rewards/rejected": -2.2653069496154785, "step": 5607 }, { "epoch": 0.65, "learning_rate": 1.0766709586796208e-07, "logits/chosen": -3.5925865173339844, "logits/rejected": -3.72599720954895, "logps/chosen": -365.674072265625, "logps/rejected": -188.48912048339844, "loss": 0.7316, "rewards/accuracies": 0.625, "rewards/chosen": -0.8029249310493469, "rewards/margins": 1.0370036363601685, "rewards/rejected": -1.8399286270141602, "step": 5608 }, { "epoch": 0.65, "learning_rate": 1.0763197939833782e-07, "logits/chosen": -2.567650318145752, "logits/rejected": -2.655116558074951, "logps/chosen": -380.6853942871094, "logps/rejected": -231.78065490722656, "loss": 0.1854, "rewards/accuracies": 1.0, "rewards/chosen": 0.7246997356414795, "rewards/margins": 2.237149238586426, "rewards/rejected": -1.5124496221542358, "step": 5609 }, { "epoch": 0.65, "learning_rate": 1.0759686292871356e-07, "logits/chosen": -3.4586851596832275, "logits/rejected": -3.451690673828125, "logps/chosen": -261.8019714355469, "logps/rejected": -297.08782958984375, "loss": 0.5052, "rewards/accuracies": 0.75, "rewards/chosen": -0.46848416328430176, "rewards/margins": 1.188066005706787, "rewards/rejected": -1.6565501689910889, "step": 5610 }, { "epoch": 0.65, "learning_rate": 1.075617464590893e-07, "logits/chosen": -3.149951934814453, "logits/rejected": -3.1391890048980713, "logps/chosen": -207.39163208007812, "logps/rejected": -298.48443603515625, "loss": 0.3342, "rewards/accuracies": 0.875, "rewards/chosen": 0.24646739661693573, "rewards/margins": 2.761017322540283, "rewards/rejected": -2.514549970626831, "step": 5611 }, { "epoch": 0.65, "learning_rate": 1.0752662998946506e-07, "logits/chosen": -2.6833510398864746, "logits/rejected": -2.3555638790130615, "logps/chosen": -84.63641357421875, "logps/rejected": -248.94668579101562, "loss": 0.4146, "rewards/accuracies": 0.625, "rewards/chosen": -0.06846611201763153, "rewards/margins": 1.8839759826660156, "rewards/rejected": -1.9524421691894531, "step": 5612 }, { "epoch": 0.65, "learning_rate": 1.0749151351984081e-07, "logits/chosen": -3.7128190994262695, "logits/rejected": -3.487022638320923, "logps/chosen": -144.5499267578125, "logps/rejected": -131.38104248046875, "loss": 0.2716, "rewards/accuracies": 1.0, "rewards/chosen": -0.2076992392539978, "rewards/margins": 1.8572113513946533, "rewards/rejected": -2.064910650253296, "step": 5613 }, { "epoch": 0.65, "learning_rate": 1.0745639705021655e-07, "logits/chosen": -2.8327040672302246, "logits/rejected": -2.8158223628997803, "logps/chosen": -300.1782531738281, "logps/rejected": -169.82550048828125, "loss": 0.3294, "rewards/accuracies": 0.875, "rewards/chosen": -0.11419486254453659, "rewards/margins": 2.0326669216156006, "rewards/rejected": -2.1468617916107178, "step": 5614 }, { "epoch": 0.65, "learning_rate": 1.0742128058059229e-07, "logits/chosen": -3.380664825439453, "logits/rejected": -3.621087074279785, "logps/chosen": -108.83056640625, "logps/rejected": -237.49118041992188, "loss": 0.3475, "rewards/accuracies": 0.75, "rewards/chosen": -0.362771213054657, "rewards/margins": 2.1301302909851074, "rewards/rejected": -2.492901563644409, "step": 5615 }, { "epoch": 0.65, "learning_rate": 1.0738616411096804e-07, "logits/chosen": -3.5697433948516846, "logits/rejected": -3.1161513328552246, "logps/chosen": -248.85910034179688, "logps/rejected": -304.90283203125, "loss": 0.1887, "rewards/accuracies": 0.875, "rewards/chosen": 0.30594247579574585, "rewards/margins": 2.8118603229522705, "rewards/rejected": -2.505917549133301, "step": 5616 }, { "epoch": 0.65, "learning_rate": 1.0735104764134379e-07, "logits/chosen": -3.0564703941345215, "logits/rejected": -2.877027988433838, "logps/chosen": -361.870361328125, "logps/rejected": -270.618408203125, "loss": 0.3359, "rewards/accuracies": 0.875, "rewards/chosen": 0.3858398497104645, "rewards/margins": 2.238760471343994, "rewards/rejected": -1.8529205322265625, "step": 5617 }, { "epoch": 0.65, "learning_rate": 1.0731593117171953e-07, "logits/chosen": -4.0963873863220215, "logits/rejected": -4.082793712615967, "logps/chosen": -290.6500549316406, "logps/rejected": -279.9942932128906, "loss": 0.0971, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034888237714767456, "rewards/margins": 3.2841386795043945, "rewards/rejected": -3.287627696990967, "step": 5618 }, { "epoch": 0.65, "learning_rate": 1.0728081470209528e-07, "logits/chosen": -3.0906553268432617, "logits/rejected": -2.7943954467773438, "logps/chosen": -425.14691162109375, "logps/rejected": -275.3019104003906, "loss": 0.6221, "rewards/accuracies": 0.625, "rewards/chosen": -0.5881611108779907, "rewards/margins": 1.158515214920044, "rewards/rejected": -1.7466763257980347, "step": 5619 }, { "epoch": 0.65, "learning_rate": 1.0724569823247102e-07, "logits/chosen": -3.1494131088256836, "logits/rejected": -3.066164016723633, "logps/chosen": -320.2164611816406, "logps/rejected": -295.4013977050781, "loss": 0.4188, "rewards/accuracies": 0.75, "rewards/chosen": -0.3534978926181793, "rewards/margins": 2.1844289302825928, "rewards/rejected": -2.5379269123077393, "step": 5620 }, { "epoch": 0.65, "learning_rate": 1.0721058176284677e-07, "logits/chosen": -3.0796542167663574, "logits/rejected": -2.746368885040283, "logps/chosen": -246.46578979492188, "logps/rejected": -277.140380859375, "loss": 0.2437, "rewards/accuracies": 0.875, "rewards/chosen": -0.15386377274990082, "rewards/margins": 2.5156631469726562, "rewards/rejected": -2.6695268154144287, "step": 5621 }, { "epoch": 0.65, "learning_rate": 1.0717546529322252e-07, "logits/chosen": -3.2416837215423584, "logits/rejected": -3.155458450317383, "logps/chosen": -312.3352355957031, "logps/rejected": -269.7440490722656, "loss": 0.3523, "rewards/accuracies": 0.75, "rewards/chosen": -0.3387162983417511, "rewards/margins": 1.5783562660217285, "rewards/rejected": -1.9170725345611572, "step": 5622 }, { "epoch": 0.65, "learning_rate": 1.0714034882359826e-07, "logits/chosen": -3.30324125289917, "logits/rejected": -3.296976089477539, "logps/chosen": -161.32293701171875, "logps/rejected": -416.4250183105469, "loss": 0.484, "rewards/accuracies": 0.625, "rewards/chosen": -0.3976821303367615, "rewards/margins": 1.7413086891174316, "rewards/rejected": -2.138990879058838, "step": 5623 }, { "epoch": 0.65, "learning_rate": 1.07105232353974e-07, "logits/chosen": -3.6000545024871826, "logits/rejected": -3.2748231887817383, "logps/chosen": -282.6176452636719, "logps/rejected": -345.45068359375, "loss": 0.4239, "rewards/accuracies": 0.875, "rewards/chosen": 0.42987963557243347, "rewards/margins": 1.4026055335998535, "rewards/rejected": -0.9727257490158081, "step": 5624 }, { "epoch": 0.65, "learning_rate": 1.0707011588434976e-07, "logits/chosen": -2.7481307983398438, "logits/rejected": -2.9967517852783203, "logps/chosen": -440.8220520019531, "logps/rejected": -239.9080810546875, "loss": 0.4028, "rewards/accuracies": 0.875, "rewards/chosen": -0.16004885733127594, "rewards/margins": 1.4254229068756104, "rewards/rejected": -1.585471749305725, "step": 5625 }, { "epoch": 0.65, "learning_rate": 1.070349994147255e-07, "logits/chosen": -2.6817069053649902, "logits/rejected": -2.652165412902832, "logps/chosen": -268.5359191894531, "logps/rejected": -302.2108154296875, "loss": 0.5902, "rewards/accuracies": 0.625, "rewards/chosen": -0.042094022035598755, "rewards/margins": 0.8159489631652832, "rewards/rejected": -0.8580430150032043, "step": 5626 }, { "epoch": 0.65, "learning_rate": 1.0699988294510124e-07, "logits/chosen": -3.9612483978271484, "logits/rejected": -3.791958808898926, "logps/chosen": -124.39517211914062, "logps/rejected": -113.29580688476562, "loss": 0.5354, "rewards/accuracies": 0.75, "rewards/chosen": -0.43146008253097534, "rewards/margins": 0.8343965411186218, "rewards/rejected": -1.2658566236495972, "step": 5627 }, { "epoch": 0.65, "learning_rate": 1.0696476647547699e-07, "logits/chosen": -3.05950665473938, "logits/rejected": -3.152378559112549, "logps/chosen": -153.36898803710938, "logps/rejected": -193.42507934570312, "loss": 0.2385, "rewards/accuracies": 0.875, "rewards/chosen": 0.5413777828216553, "rewards/margins": 2.2741198539733887, "rewards/rejected": -1.732742190361023, "step": 5628 }, { "epoch": 0.65, "learning_rate": 1.0692965000585274e-07, "logits/chosen": -2.5198967456817627, "logits/rejected": -2.5737547874450684, "logps/chosen": -244.37142944335938, "logps/rejected": -240.4387664794922, "loss": 0.1569, "rewards/accuracies": 0.875, "rewards/chosen": -0.01401326060295105, "rewards/margins": 2.80594539642334, "rewards/rejected": -2.8199586868286133, "step": 5629 }, { "epoch": 0.65, "learning_rate": 1.0689453353622849e-07, "logits/chosen": -2.3849613666534424, "logits/rejected": -2.583472967147827, "logps/chosen": -263.3632507324219, "logps/rejected": -233.72970581054688, "loss": 0.3461, "rewards/accuracies": 0.875, "rewards/chosen": -0.38611477613449097, "rewards/margins": 1.4344377517700195, "rewards/rejected": -1.8205524682998657, "step": 5630 }, { "epoch": 0.65, "learning_rate": 1.0685941706660423e-07, "logits/chosen": -3.03352952003479, "logits/rejected": -3.284853219985962, "logps/chosen": -275.52435302734375, "logps/rejected": -236.57351684570312, "loss": 0.3627, "rewards/accuracies": 0.875, "rewards/chosen": -0.27701035141944885, "rewards/margins": 1.753282070159912, "rewards/rejected": -2.030292510986328, "step": 5631 }, { "epoch": 0.65, "learning_rate": 1.0682430059697997e-07, "logits/chosen": -2.5650057792663574, "logits/rejected": -2.698627471923828, "logps/chosen": -169.54940795898438, "logps/rejected": -238.5128173828125, "loss": 0.5999, "rewards/accuracies": 0.75, "rewards/chosen": -0.41020867228507996, "rewards/margins": 1.0185129642486572, "rewards/rejected": -1.42872154712677, "step": 5632 }, { "epoch": 0.65, "learning_rate": 1.0678918412735573e-07, "logits/chosen": -3.33251690864563, "logits/rejected": -3.1733808517456055, "logps/chosen": -271.3424987792969, "logps/rejected": -320.1819152832031, "loss": 0.4933, "rewards/accuracies": 0.75, "rewards/chosen": -0.43721261620521545, "rewards/margins": 0.9759700298309326, "rewards/rejected": -1.4131826162338257, "step": 5633 }, { "epoch": 0.65, "learning_rate": 1.0675406765773147e-07, "logits/chosen": -3.4792590141296387, "logits/rejected": -3.5824410915374756, "logps/chosen": -277.6376037597656, "logps/rejected": -299.1560363769531, "loss": 0.1636, "rewards/accuracies": 1.0, "rewards/chosen": 0.018851593136787415, "rewards/margins": 3.747931957244873, "rewards/rejected": -3.7290802001953125, "step": 5634 }, { "epoch": 0.65, "learning_rate": 1.0671895118810721e-07, "logits/chosen": -2.170626640319824, "logits/rejected": -2.1167328357696533, "logps/chosen": -118.99354553222656, "logps/rejected": -225.41033935546875, "loss": 0.3474, "rewards/accuracies": 0.875, "rewards/chosen": 0.061789363622665405, "rewards/margins": 1.9839577674865723, "rewards/rejected": -1.922168254852295, "step": 5635 }, { "epoch": 0.65, "learning_rate": 1.0668383471848296e-07, "logits/chosen": -3.1704862117767334, "logits/rejected": -2.985698938369751, "logps/chosen": -293.5659484863281, "logps/rejected": -258.3111267089844, "loss": 0.1343, "rewards/accuracies": 1.0, "rewards/chosen": 0.7479022145271301, "rewards/margins": 2.3396358489990234, "rewards/rejected": -1.5917335748672485, "step": 5636 }, { "epoch": 0.65, "learning_rate": 1.0664871824885872e-07, "logits/chosen": -3.299772262573242, "logits/rejected": -3.2477047443389893, "logps/chosen": -133.55397033691406, "logps/rejected": -224.42312622070312, "loss": 0.2976, "rewards/accuracies": 0.875, "rewards/chosen": -0.8161277770996094, "rewards/margins": 2.388042449951172, "rewards/rejected": -3.2041702270507812, "step": 5637 }, { "epoch": 0.65, "learning_rate": 1.0661360177923446e-07, "logits/chosen": -3.3448681831359863, "logits/rejected": -3.331289291381836, "logps/chosen": -389.712890625, "logps/rejected": -331.9437255859375, "loss": 0.1127, "rewards/accuracies": 1.0, "rewards/chosen": -0.0990728810429573, "rewards/margins": 2.878101348876953, "rewards/rejected": -2.9771742820739746, "step": 5638 }, { "epoch": 0.65, "learning_rate": 1.065784853096102e-07, "logits/chosen": -2.780048370361328, "logits/rejected": -2.751544237136841, "logps/chosen": -244.10482788085938, "logps/rejected": -404.24127197265625, "loss": 0.3062, "rewards/accuracies": 0.875, "rewards/chosen": 0.2737772762775421, "rewards/margins": 1.9931374788284302, "rewards/rejected": -1.7193602323532104, "step": 5639 }, { "epoch": 0.65, "learning_rate": 1.0654336883998594e-07, "logits/chosen": -3.902237892150879, "logits/rejected": -3.9272589683532715, "logps/chosen": -316.08642578125, "logps/rejected": -238.65634155273438, "loss": 0.347, "rewards/accuracies": 0.875, "rewards/chosen": -0.2896531820297241, "rewards/margins": 1.7634758949279785, "rewards/rejected": -2.053129196166992, "step": 5640 }, { "epoch": 0.65, "learning_rate": 1.065082523703617e-07, "logits/chosen": -2.7935855388641357, "logits/rejected": -3.0234084129333496, "logps/chosen": -237.99130249023438, "logps/rejected": -180.79141235351562, "loss": 0.3619, "rewards/accuracies": 0.75, "rewards/chosen": 0.2731037735939026, "rewards/margins": 1.7001707553863525, "rewards/rejected": -1.4270669221878052, "step": 5641 }, { "epoch": 0.65, "learning_rate": 1.0647313590073745e-07, "logits/chosen": -2.949439764022827, "logits/rejected": -2.7813570499420166, "logps/chosen": -310.2611999511719, "logps/rejected": -259.5787048339844, "loss": 0.1304, "rewards/accuracies": 1.0, "rewards/chosen": 0.35151568055152893, "rewards/margins": 3.0993924140930176, "rewards/rejected": -2.7478768825531006, "step": 5642 }, { "epoch": 0.65, "learning_rate": 1.0643801943111319e-07, "logits/chosen": -2.239032745361328, "logits/rejected": -2.5805978775024414, "logps/chosen": -351.34051513671875, "logps/rejected": -339.5082092285156, "loss": 0.2077, "rewards/accuracies": 1.0, "rewards/chosen": 0.7640773057937622, "rewards/margins": 2.4078924655914307, "rewards/rejected": -1.6438151597976685, "step": 5643 }, { "epoch": 0.65, "learning_rate": 1.0640290296148893e-07, "logits/chosen": -3.7529730796813965, "logits/rejected": -3.305634021759033, "logps/chosen": -203.53274536132812, "logps/rejected": -117.1561050415039, "loss": 0.7392, "rewards/accuracies": 0.5, "rewards/chosen": 0.030054934322834015, "rewards/margins": 0.595278799533844, "rewards/rejected": -0.5652239322662354, "step": 5644 }, { "epoch": 0.65, "learning_rate": 1.0636778649186468e-07, "logits/chosen": -3.780831813812256, "logits/rejected": -3.5922584533691406, "logps/chosen": -258.83477783203125, "logps/rejected": -280.8088684082031, "loss": 0.7261, "rewards/accuracies": 0.375, "rewards/chosen": -0.5713047981262207, "rewards/margins": 1.3562636375427246, "rewards/rejected": -1.9275684356689453, "step": 5645 }, { "epoch": 0.65, "learning_rate": 1.0633267002224042e-07, "logits/chosen": -3.3099231719970703, "logits/rejected": -3.187441349029541, "logps/chosen": -235.12351989746094, "logps/rejected": -298.9351501464844, "loss": 0.29, "rewards/accuracies": 0.75, "rewards/chosen": -0.0494069866836071, "rewards/margins": 2.3913867473602295, "rewards/rejected": -2.440793752670288, "step": 5646 }, { "epoch": 0.65, "learning_rate": 1.0629755355261618e-07, "logits/chosen": -4.01441764831543, "logits/rejected": -3.8159525394439697, "logps/chosen": -139.42787170410156, "logps/rejected": -159.613525390625, "loss": 0.5203, "rewards/accuracies": 0.625, "rewards/chosen": -0.14440304040908813, "rewards/margins": 1.1954987049102783, "rewards/rejected": -1.3399016857147217, "step": 5647 }, { "epoch": 0.65, "learning_rate": 1.0626243708299192e-07, "logits/chosen": -2.5943546295166016, "logits/rejected": -2.703242778778076, "logps/chosen": -214.1205291748047, "logps/rejected": -234.7897186279297, "loss": 0.585, "rewards/accuracies": 0.625, "rewards/chosen": -0.1280806064605713, "rewards/margins": 1.4257488250732422, "rewards/rejected": -1.553829312324524, "step": 5648 }, { "epoch": 0.65, "learning_rate": 1.0622732061336767e-07, "logits/chosen": -3.1391568183898926, "logits/rejected": -3.265798807144165, "logps/chosen": -401.6448974609375, "logps/rejected": -390.2916259765625, "loss": 0.4921, "rewards/accuracies": 0.625, "rewards/chosen": 0.4184234142303467, "rewards/margins": 1.970555305480957, "rewards/rejected": -1.5521318912506104, "step": 5649 }, { "epoch": 0.65, "learning_rate": 1.0619220414374341e-07, "logits/chosen": -3.4210269451141357, "logits/rejected": -3.4223904609680176, "logps/chosen": -240.19163513183594, "logps/rejected": -282.5373229980469, "loss": 0.649, "rewards/accuracies": 0.625, "rewards/chosen": -0.4499392509460449, "rewards/margins": 1.1601508855819702, "rewards/rejected": -1.6100900173187256, "step": 5650 }, { "epoch": 0.65, "learning_rate": 1.0615708767411915e-07, "logits/chosen": -3.1556801795959473, "logits/rejected": -2.656970977783203, "logps/chosen": -272.407470703125, "logps/rejected": -272.4090576171875, "loss": 0.3913, "rewards/accuracies": 0.625, "rewards/chosen": 0.09946460276842117, "rewards/margins": 2.429797649383545, "rewards/rejected": -2.3303327560424805, "step": 5651 }, { "epoch": 0.65, "learning_rate": 1.0612197120449489e-07, "logits/chosen": -2.8886561393737793, "logits/rejected": -2.7998502254486084, "logps/chosen": -196.05117797851562, "logps/rejected": -508.733154296875, "loss": 0.1769, "rewards/accuracies": 1.0, "rewards/chosen": 0.40366441011428833, "rewards/margins": 3.828294277191162, "rewards/rejected": -3.4246296882629395, "step": 5652 }, { "epoch": 0.65, "learning_rate": 1.0608685473487066e-07, "logits/chosen": -3.5574193000793457, "logits/rejected": -3.5740721225738525, "logps/chosen": -400.5965881347656, "logps/rejected": -244.47166442871094, "loss": 0.4949, "rewards/accuracies": 0.875, "rewards/chosen": 0.05485007166862488, "rewards/margins": 1.314725637435913, "rewards/rejected": -1.2598754167556763, "step": 5653 }, { "epoch": 0.65, "learning_rate": 1.060517382652464e-07, "logits/chosen": -2.7622063159942627, "logits/rejected": -3.0911450386047363, "logps/chosen": -371.63128662109375, "logps/rejected": -325.51800537109375, "loss": 0.1615, "rewards/accuracies": 1.0, "rewards/chosen": 0.2772482633590698, "rewards/margins": 3.532459020614624, "rewards/rejected": -3.2552106380462646, "step": 5654 }, { "epoch": 0.65, "learning_rate": 1.0601662179562214e-07, "logits/chosen": -2.849799156188965, "logits/rejected": -2.7424912452697754, "logps/chosen": -258.6644592285156, "logps/rejected": -427.9648132324219, "loss": 0.078, "rewards/accuracies": 1.0, "rewards/chosen": 0.02935117483139038, "rewards/margins": 2.8424487113952637, "rewards/rejected": -2.8130974769592285, "step": 5655 }, { "epoch": 0.65, "learning_rate": 1.0598150532599788e-07, "logits/chosen": -2.6591079235076904, "logits/rejected": -2.7387495040893555, "logps/chosen": -286.99951171875, "logps/rejected": -226.531494140625, "loss": 0.3834, "rewards/accuracies": 0.875, "rewards/chosen": -0.43943923711776733, "rewards/margins": 1.1658662557601929, "rewards/rejected": -1.605305552482605, "step": 5656 }, { "epoch": 0.65, "learning_rate": 1.0594638885637364e-07, "logits/chosen": -3.119502067565918, "logits/rejected": -2.653235912322998, "logps/chosen": -251.0325927734375, "logps/rejected": -270.8792724609375, "loss": 0.3374, "rewards/accuracies": 0.875, "rewards/chosen": -0.2780793011188507, "rewards/margins": 2.130380153656006, "rewards/rejected": -2.408459424972534, "step": 5657 }, { "epoch": 0.65, "learning_rate": 1.0591127238674939e-07, "logits/chosen": -2.950901985168457, "logits/rejected": -3.069645404815674, "logps/chosen": -145.67153930664062, "logps/rejected": -144.027587890625, "loss": 0.4273, "rewards/accuracies": 0.75, "rewards/chosen": 0.3278617560863495, "rewards/margins": 1.1225440502166748, "rewards/rejected": -0.7946822643280029, "step": 5658 }, { "epoch": 0.65, "learning_rate": 1.0587615591712513e-07, "logits/chosen": -3.4326488971710205, "logits/rejected": -3.471013307571411, "logps/chosen": -124.53093719482422, "logps/rejected": -223.44517517089844, "loss": 0.1911, "rewards/accuracies": 1.0, "rewards/chosen": 0.16564786434173584, "rewards/margins": 2.682650566101074, "rewards/rejected": -2.517002820968628, "step": 5659 }, { "epoch": 0.65, "learning_rate": 1.0584103944750087e-07, "logits/chosen": -3.178682804107666, "logits/rejected": -3.2476272583007812, "logps/chosen": -321.0895080566406, "logps/rejected": -253.02748107910156, "loss": 0.2562, "rewards/accuracies": 0.875, "rewards/chosen": 0.14146476984024048, "rewards/margins": 2.842315912246704, "rewards/rejected": -2.7008509635925293, "step": 5660 }, { "epoch": 0.65, "learning_rate": 1.0580592297787662e-07, "logits/chosen": -3.033768653869629, "logits/rejected": -2.892059564590454, "logps/chosen": -222.59097290039062, "logps/rejected": -168.8028564453125, "loss": 0.4481, "rewards/accuracies": 0.75, "rewards/chosen": 0.06116996705532074, "rewards/margins": 0.9721100926399231, "rewards/rejected": -0.9109401702880859, "step": 5661 }, { "epoch": 0.65, "learning_rate": 1.0577080650825236e-07, "logits/chosen": -2.9500136375427246, "logits/rejected": -2.8467724323272705, "logps/chosen": -255.1674041748047, "logps/rejected": -290.8746337890625, "loss": 0.2385, "rewards/accuracies": 0.875, "rewards/chosen": 0.28930673003196716, "rewards/margins": 2.3265459537506104, "rewards/rejected": -2.0372393131256104, "step": 5662 }, { "epoch": 0.65, "learning_rate": 1.057356900386281e-07, "logits/chosen": -2.848926067352295, "logits/rejected": -2.918388843536377, "logps/chosen": -402.5325927734375, "logps/rejected": -264.5010681152344, "loss": 0.2247, "rewards/accuracies": 1.0, "rewards/chosen": 0.16099755465984344, "rewards/margins": 2.3789467811584473, "rewards/rejected": -2.217949390411377, "step": 5663 }, { "epoch": 0.65, "learning_rate": 1.0570057356900386e-07, "logits/chosen": -3.0099682807922363, "logits/rejected": -2.885167121887207, "logps/chosen": -236.37689208984375, "logps/rejected": -362.9037170410156, "loss": 0.4405, "rewards/accuracies": 0.75, "rewards/chosen": -0.3657780885696411, "rewards/margins": 1.1091043949127197, "rewards/rejected": -1.4748824834823608, "step": 5664 }, { "epoch": 0.65, "learning_rate": 1.0566545709937961e-07, "logits/chosen": -2.9213805198669434, "logits/rejected": -3.078748941421509, "logps/chosen": -183.796875, "logps/rejected": -229.20748901367188, "loss": 0.2711, "rewards/accuracies": 1.0, "rewards/chosen": -0.5323242545127869, "rewards/margins": 1.8985296487808228, "rewards/rejected": -2.430853843688965, "step": 5665 }, { "epoch": 0.65, "learning_rate": 1.0563034062975535e-07, "logits/chosen": -3.6523122787475586, "logits/rejected": -3.3650810718536377, "logps/chosen": -192.48849487304688, "logps/rejected": -154.14117431640625, "loss": 0.456, "rewards/accuracies": 0.75, "rewards/chosen": -0.1224488765001297, "rewards/margins": 1.600853443145752, "rewards/rejected": -1.7233024835586548, "step": 5666 }, { "epoch": 0.65, "learning_rate": 1.055952241601311e-07, "logits/chosen": -3.1325795650482178, "logits/rejected": -2.974757671356201, "logps/chosen": -222.03225708007812, "logps/rejected": -437.45953369140625, "loss": 0.2539, "rewards/accuracies": 1.0, "rewards/chosen": -0.08758679032325745, "rewards/margins": 2.350001811981201, "rewards/rejected": -2.4375884532928467, "step": 5667 }, { "epoch": 0.65, "learning_rate": 1.0556010769050684e-07, "logits/chosen": -2.751160144805908, "logits/rejected": -2.456346035003662, "logps/chosen": -323.395751953125, "logps/rejected": -310.3280029296875, "loss": 0.5966, "rewards/accuracies": 0.625, "rewards/chosen": -0.2822277843952179, "rewards/margins": 0.33945244550704956, "rewards/rejected": -0.6216802597045898, "step": 5668 }, { "epoch": 0.65, "learning_rate": 1.0552499122088258e-07, "logits/chosen": -2.8875417709350586, "logits/rejected": -2.777507781982422, "logps/chosen": -214.64027404785156, "logps/rejected": -267.35870361328125, "loss": 0.6261, "rewards/accuracies": 0.5, "rewards/chosen": -0.4231797456741333, "rewards/margins": 0.5762326717376709, "rewards/rejected": -0.9994123578071594, "step": 5669 }, { "epoch": 0.65, "learning_rate": 1.0548987475125834e-07, "logits/chosen": -3.3265933990478516, "logits/rejected": -3.226207971572876, "logps/chosen": -543.3410034179688, "logps/rejected": -664.228271484375, "loss": 0.3178, "rewards/accuracies": 0.75, "rewards/chosen": 0.28617334365844727, "rewards/margins": 1.7684576511383057, "rewards/rejected": -1.4822843074798584, "step": 5670 }, { "epoch": 0.65, "learning_rate": 1.0545475828163408e-07, "logits/chosen": -3.2650251388549805, "logits/rejected": -3.645474433898926, "logps/chosen": -249.3783416748047, "logps/rejected": -189.82521057128906, "loss": 0.2603, "rewards/accuracies": 1.0, "rewards/chosen": 0.2167520523071289, "rewards/margins": 1.931557536125183, "rewards/rejected": -1.7148054838180542, "step": 5671 }, { "epoch": 0.65, "learning_rate": 1.0541964181200982e-07, "logits/chosen": -3.2366621494293213, "logits/rejected": -3.22756290435791, "logps/chosen": -224.30191040039062, "logps/rejected": -176.5642547607422, "loss": 0.228, "rewards/accuracies": 0.875, "rewards/chosen": 0.3034258484840393, "rewards/margins": 3.0575666427612305, "rewards/rejected": -2.754140853881836, "step": 5672 }, { "epoch": 0.65, "learning_rate": 1.0538452534238556e-07, "logits/chosen": -3.0748085975646973, "logits/rejected": -3.197624921798706, "logps/chosen": -197.33633422851562, "logps/rejected": -222.533447265625, "loss": 0.2329, "rewards/accuracies": 1.0, "rewards/chosen": -0.00991755723953247, "rewards/margins": 2.039431095123291, "rewards/rejected": -2.0493483543395996, "step": 5673 }, { "epoch": 0.65, "learning_rate": 1.0534940887276132e-07, "logits/chosen": -2.5382986068725586, "logits/rejected": -2.4201886653900146, "logps/chosen": -224.68167114257812, "logps/rejected": -251.83700561523438, "loss": 0.3632, "rewards/accuracies": 0.875, "rewards/chosen": -0.2533620595932007, "rewards/margins": 1.2374414205551147, "rewards/rejected": -1.4908034801483154, "step": 5674 }, { "epoch": 0.65, "learning_rate": 1.0531429240313707e-07, "logits/chosen": -3.029167652130127, "logits/rejected": -2.9935715198516846, "logps/chosen": -452.415771484375, "logps/rejected": -330.2823486328125, "loss": 0.3319, "rewards/accuracies": 0.75, "rewards/chosen": -0.1379069685935974, "rewards/margins": 1.765679955482483, "rewards/rejected": -1.9035868644714355, "step": 5675 }, { "epoch": 0.65, "learning_rate": 1.0527917593351281e-07, "logits/chosen": -2.7963755130767822, "logits/rejected": -2.789581537246704, "logps/chosen": -275.1109924316406, "logps/rejected": -298.79852294921875, "loss": 0.2468, "rewards/accuracies": 1.0, "rewards/chosen": 0.16254812479019165, "rewards/margins": 2.0897903442382812, "rewards/rejected": -1.9272422790527344, "step": 5676 }, { "epoch": 0.65, "learning_rate": 1.0524405946388855e-07, "logits/chosen": -3.1262917518615723, "logits/rejected": -3.1405839920043945, "logps/chosen": -148.9027557373047, "logps/rejected": -147.8314208984375, "loss": 0.5529, "rewards/accuracies": 0.625, "rewards/chosen": -0.8513997793197632, "rewards/margins": 0.7240864634513855, "rewards/rejected": -1.575486183166504, "step": 5677 }, { "epoch": 0.65, "learning_rate": 1.0520894299426431e-07, "logits/chosen": -2.790769338607788, "logits/rejected": -2.8445677757263184, "logps/chosen": -239.002197265625, "logps/rejected": -241.6929168701172, "loss": 0.5074, "rewards/accuracies": 0.875, "rewards/chosen": -0.5344011187553406, "rewards/margins": 1.1009798049926758, "rewards/rejected": -1.6353809833526611, "step": 5678 }, { "epoch": 0.65, "learning_rate": 1.0517382652464005e-07, "logits/chosen": -3.7280218601226807, "logits/rejected": -3.8529036045074463, "logps/chosen": -210.79376220703125, "logps/rejected": -205.57794189453125, "loss": 0.4616, "rewards/accuracies": 0.75, "rewards/chosen": 0.2839612662792206, "rewards/margins": 1.8908631801605225, "rewards/rejected": -1.6069018840789795, "step": 5679 }, { "epoch": 0.65, "learning_rate": 1.0513871005501579e-07, "logits/chosen": -3.0787436962127686, "logits/rejected": -3.214360237121582, "logps/chosen": -266.3089904785156, "logps/rejected": -291.978759765625, "loss": 0.2913, "rewards/accuracies": 0.75, "rewards/chosen": 0.1352439820766449, "rewards/margins": 3.3252556324005127, "rewards/rejected": -3.190011739730835, "step": 5680 }, { "epoch": 0.65, "learning_rate": 1.0510359358539154e-07, "logits/chosen": -3.4405646324157715, "logits/rejected": -3.6403346061706543, "logps/chosen": -78.33030700683594, "logps/rejected": -171.14923095703125, "loss": 0.2724, "rewards/accuracies": 0.875, "rewards/chosen": 0.08642993867397308, "rewards/margins": 2.9304428100585938, "rewards/rejected": -2.84401273727417, "step": 5681 }, { "epoch": 0.66, "learning_rate": 1.050684771157673e-07, "logits/chosen": -3.1489312648773193, "logits/rejected": -3.263981342315674, "logps/chosen": -166.10752868652344, "logps/rejected": -216.5714569091797, "loss": 0.5901, "rewards/accuracies": 0.625, "rewards/chosen": -0.11411190032958984, "rewards/margins": 0.6056146621704102, "rewards/rejected": -0.7197266221046448, "step": 5682 }, { "epoch": 0.66, "learning_rate": 1.0503336064614304e-07, "logits/chosen": -3.284754991531372, "logits/rejected": -3.1118085384368896, "logps/chosen": -246.32818603515625, "logps/rejected": -215.7868194580078, "loss": 0.5536, "rewards/accuracies": 0.625, "rewards/chosen": -0.16461071372032166, "rewards/margins": 0.9271101951599121, "rewards/rejected": -1.0917208194732666, "step": 5683 }, { "epoch": 0.66, "learning_rate": 1.0499824417651878e-07, "logits/chosen": -2.8899500370025635, "logits/rejected": -2.9293642044067383, "logps/chosen": -97.51370239257812, "logps/rejected": -173.61080932617188, "loss": 0.2343, "rewards/accuracies": 1.0, "rewards/chosen": 0.18274614214897156, "rewards/margins": 2.240851640701294, "rewards/rejected": -2.05810546875, "step": 5684 }, { "epoch": 0.66, "learning_rate": 1.0496312770689452e-07, "logits/chosen": -3.6179728507995605, "logits/rejected": -3.8891453742980957, "logps/chosen": -321.82598876953125, "logps/rejected": -278.4671325683594, "loss": 0.5094, "rewards/accuracies": 0.75, "rewards/chosen": -0.6125072240829468, "rewards/margins": 1.3180747032165527, "rewards/rejected": -1.9305819272994995, "step": 5685 }, { "epoch": 0.66, "learning_rate": 1.0492801123727029e-07, "logits/chosen": -3.957486867904663, "logits/rejected": -3.761627435684204, "logps/chosen": -147.50839233398438, "logps/rejected": -119.27775573730469, "loss": 0.5428, "rewards/accuracies": 0.875, "rewards/chosen": -0.18673869967460632, "rewards/margins": 1.2834882736206055, "rewards/rejected": -1.4702270030975342, "step": 5686 }, { "epoch": 0.66, "learning_rate": 1.0489289476764603e-07, "logits/chosen": -2.319512128829956, "logits/rejected": -2.418222427368164, "logps/chosen": -142.8349609375, "logps/rejected": -245.02413940429688, "loss": 0.1976, "rewards/accuracies": 1.0, "rewards/chosen": -0.24598731100559235, "rewards/margins": 2.870877504348755, "rewards/rejected": -3.1168649196624756, "step": 5687 }, { "epoch": 0.66, "learning_rate": 1.0485777829802177e-07, "logits/chosen": -3.803910970687866, "logits/rejected": -3.607776403427124, "logps/chosen": -163.85333251953125, "logps/rejected": -266.318115234375, "loss": 0.2832, "rewards/accuracies": 0.875, "rewards/chosen": 0.5149109959602356, "rewards/margins": 2.472567558288574, "rewards/rejected": -1.9576565027236938, "step": 5688 }, { "epoch": 0.66, "learning_rate": 1.0482266182839751e-07, "logits/chosen": -2.8760786056518555, "logits/rejected": -2.9410862922668457, "logps/chosen": -212.697509765625, "logps/rejected": -320.9857177734375, "loss": 0.2641, "rewards/accuracies": 0.875, "rewards/chosen": 0.1762629747390747, "rewards/margins": 3.213963508605957, "rewards/rejected": -3.037700653076172, "step": 5689 }, { "epoch": 0.66, "learning_rate": 1.0478754535877326e-07, "logits/chosen": -3.1751739978790283, "logits/rejected": -3.032536506652832, "logps/chosen": -267.23199462890625, "logps/rejected": -279.4087829589844, "loss": 0.5959, "rewards/accuracies": 0.75, "rewards/chosen": -0.37167856097221375, "rewards/margins": 1.4079164266586304, "rewards/rejected": -1.7795950174331665, "step": 5690 }, { "epoch": 0.66, "learning_rate": 1.04752428889149e-07, "logits/chosen": -3.249744415283203, "logits/rejected": -3.4339518547058105, "logps/chosen": -183.49972534179688, "logps/rejected": -215.670166015625, "loss": 0.6862, "rewards/accuracies": 0.5, "rewards/chosen": -0.08495617657899857, "rewards/margins": 1.819348692893982, "rewards/rejected": -1.9043047428131104, "step": 5691 }, { "epoch": 0.66, "learning_rate": 1.0471731241952476e-07, "logits/chosen": -3.664407253265381, "logits/rejected": -3.8349153995513916, "logps/chosen": -217.32522583007812, "logps/rejected": -290.8777770996094, "loss": 0.3375, "rewards/accuracies": 0.75, "rewards/chosen": 0.3529944121837616, "rewards/margins": 2.1300225257873535, "rewards/rejected": -1.77702796459198, "step": 5692 }, { "epoch": 0.66, "learning_rate": 1.046821959499005e-07, "logits/chosen": -2.8151869773864746, "logits/rejected": -2.9243216514587402, "logps/chosen": -362.2946472167969, "logps/rejected": -243.14352416992188, "loss": 0.2589, "rewards/accuracies": 1.0, "rewards/chosen": 0.7825701236724854, "rewards/margins": 2.1367433071136475, "rewards/rejected": -1.354173183441162, "step": 5693 }, { "epoch": 0.66, "learning_rate": 1.0464707948027625e-07, "logits/chosen": -3.183488130569458, "logits/rejected": -3.603588342666626, "logps/chosen": -138.3988800048828, "logps/rejected": -226.831787109375, "loss": 0.3695, "rewards/accuracies": 0.875, "rewards/chosen": 0.1802050769329071, "rewards/margins": 2.0392398834228516, "rewards/rejected": -1.859034776687622, "step": 5694 }, { "epoch": 0.66, "learning_rate": 1.0461196301065199e-07, "logits/chosen": -3.4468328952789307, "logits/rejected": -3.568474054336548, "logps/chosen": -124.92695617675781, "logps/rejected": -355.1290283203125, "loss": 0.4872, "rewards/accuracies": 0.75, "rewards/chosen": 0.12237244844436646, "rewards/margins": 1.5683296918869019, "rewards/rejected": -1.4459571838378906, "step": 5695 }, { "epoch": 0.66, "learning_rate": 1.0457684654102773e-07, "logits/chosen": -3.7337703704833984, "logits/rejected": -3.6179959774017334, "logps/chosen": -181.03802490234375, "logps/rejected": -199.02984619140625, "loss": 0.7119, "rewards/accuracies": 0.875, "rewards/chosen": -0.32055598497390747, "rewards/margins": 0.9162235856056213, "rewards/rejected": -1.2367795705795288, "step": 5696 }, { "epoch": 0.66, "learning_rate": 1.0454173007140347e-07, "logits/chosen": -3.6756057739257812, "logits/rejected": -3.4971208572387695, "logps/chosen": -206.87847900390625, "logps/rejected": -284.82073974609375, "loss": 0.3417, "rewards/accuracies": 0.875, "rewards/chosen": -0.45266881585121155, "rewards/margins": 3.3560338020324707, "rewards/rejected": -3.8087027072906494, "step": 5697 }, { "epoch": 0.66, "learning_rate": 1.0450661360177924e-07, "logits/chosen": -2.911289691925049, "logits/rejected": -3.0027456283569336, "logps/chosen": -320.0033874511719, "logps/rejected": -307.98486328125, "loss": 0.4677, "rewards/accuracies": 0.75, "rewards/chosen": 0.20468509197235107, "rewards/margins": 1.2846497297286987, "rewards/rejected": -1.0799646377563477, "step": 5698 }, { "epoch": 0.66, "learning_rate": 1.0447149713215498e-07, "logits/chosen": -3.840421676635742, "logits/rejected": -3.6942310333251953, "logps/chosen": -109.56204223632812, "logps/rejected": -93.37203979492188, "loss": 0.2908, "rewards/accuracies": 0.875, "rewards/chosen": 0.763930082321167, "rewards/margins": 1.803572654724121, "rewards/rejected": -1.039642572402954, "step": 5699 }, { "epoch": 0.66, "learning_rate": 1.0443638066253072e-07, "logits/chosen": -3.4806346893310547, "logits/rejected": -3.6597394943237305, "logps/chosen": -249.08120727539062, "logps/rejected": -237.81280517578125, "loss": 0.5392, "rewards/accuracies": 0.75, "rewards/chosen": -0.32755815982818604, "rewards/margins": 0.9470750093460083, "rewards/rejected": -1.2746331691741943, "step": 5700 }, { "epoch": 0.66, "learning_rate": 1.0440126419290646e-07, "logits/chosen": -3.540438652038574, "logits/rejected": -3.0810225009918213, "logps/chosen": -143.7138214111328, "logps/rejected": -236.2102508544922, "loss": 0.5851, "rewards/accuracies": 0.5, "rewards/chosen": -0.46429991722106934, "rewards/margins": 0.959042489528656, "rewards/rejected": -1.4233425855636597, "step": 5701 }, { "epoch": 0.66, "learning_rate": 1.0436614772328223e-07, "logits/chosen": -2.752253293991089, "logits/rejected": -2.399976968765259, "logps/chosen": -270.9918212890625, "logps/rejected": -289.14703369140625, "loss": 0.2854, "rewards/accuracies": 0.875, "rewards/chosen": 0.03849659487605095, "rewards/margins": 2.6767969131469727, "rewards/rejected": -2.6383001804351807, "step": 5702 }, { "epoch": 0.66, "learning_rate": 1.0433103125365797e-07, "logits/chosen": -3.2046937942504883, "logits/rejected": -3.295044422149658, "logps/chosen": -200.97665405273438, "logps/rejected": -226.44586181640625, "loss": 0.4062, "rewards/accuracies": 0.875, "rewards/chosen": 0.12999536097049713, "rewards/margins": 1.1068569421768188, "rewards/rejected": -0.9768615961074829, "step": 5703 }, { "epoch": 0.66, "learning_rate": 1.0429591478403371e-07, "logits/chosen": -2.9703588485717773, "logits/rejected": -3.274094343185425, "logps/chosen": -156.4149627685547, "logps/rejected": -224.1671142578125, "loss": 0.1751, "rewards/accuracies": 0.875, "rewards/chosen": -0.027678310871124268, "rewards/margins": 3.3839643001556396, "rewards/rejected": -3.411642551422119, "step": 5704 }, { "epoch": 0.66, "learning_rate": 1.0426079831440945e-07, "logits/chosen": -3.10577392578125, "logits/rejected": -3.619148015975952, "logps/chosen": -131.41033935546875, "logps/rejected": -298.48724365234375, "loss": 0.165, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007128790020942688, "rewards/margins": 2.637416362762451, "rewards/rejected": -2.638129234313965, "step": 5705 }, { "epoch": 0.66, "learning_rate": 1.042256818447852e-07, "logits/chosen": -2.6301238536834717, "logits/rejected": -2.424525260925293, "logps/chosen": -158.76478576660156, "logps/rejected": -118.92388153076172, "loss": 0.6512, "rewards/accuracies": 0.375, "rewards/chosen": -0.5498378276824951, "rewards/margins": 0.1957525759935379, "rewards/rejected": -0.7455903887748718, "step": 5706 }, { "epoch": 0.66, "learning_rate": 1.0419056537516094e-07, "logits/chosen": -3.317917823791504, "logits/rejected": -3.3548166751861572, "logps/chosen": -269.99188232421875, "logps/rejected": -327.50982666015625, "loss": 1.0147, "rewards/accuracies": 0.5, "rewards/chosen": -1.3903298377990723, "rewards/margins": 0.8020012378692627, "rewards/rejected": -2.192330837249756, "step": 5707 }, { "epoch": 0.66, "learning_rate": 1.0415544890553668e-07, "logits/chosen": -2.92586612701416, "logits/rejected": -2.777235984802246, "logps/chosen": -215.6224365234375, "logps/rejected": -191.28253173828125, "loss": 0.2442, "rewards/accuracies": 0.875, "rewards/chosen": 0.4073292315006256, "rewards/margins": 1.915848731994629, "rewards/rejected": -1.5085195302963257, "step": 5708 }, { "epoch": 0.66, "learning_rate": 1.0412033243591244e-07, "logits/chosen": -3.2123873233795166, "logits/rejected": -3.033339262008667, "logps/chosen": -309.721923828125, "logps/rejected": -360.3021240234375, "loss": 0.1267, "rewards/accuracies": 0.875, "rewards/chosen": -0.015027925372123718, "rewards/margins": 4.467125415802002, "rewards/rejected": -4.482152938842773, "step": 5709 }, { "epoch": 0.66, "learning_rate": 1.0408521596628819e-07, "logits/chosen": -3.307359218597412, "logits/rejected": -3.4250526428222656, "logps/chosen": -224.22018432617188, "logps/rejected": -299.73651123046875, "loss": 0.5033, "rewards/accuracies": 0.75, "rewards/chosen": -0.806451141834259, "rewards/margins": 0.9775727391242981, "rewards/rejected": -1.7840238809585571, "step": 5710 }, { "epoch": 0.66, "learning_rate": 1.0405009949666393e-07, "logits/chosen": -2.572415828704834, "logits/rejected": -2.288235664367676, "logps/chosen": -390.760986328125, "logps/rejected": -326.1639099121094, "loss": 0.4491, "rewards/accuracies": 0.75, "rewards/chosen": 0.280534029006958, "rewards/margins": 1.3565744161605835, "rewards/rejected": -1.076040506362915, "step": 5711 }, { "epoch": 0.66, "learning_rate": 1.0401498302703967e-07, "logits/chosen": -3.079155445098877, "logits/rejected": -3.2752201557159424, "logps/chosen": -274.352294921875, "logps/rejected": -248.8653564453125, "loss": 0.339, "rewards/accuracies": 1.0, "rewards/chosen": -0.24281111359596252, "rewards/margins": 1.1365067958831787, "rewards/rejected": -1.3793179988861084, "step": 5712 }, { "epoch": 0.66, "learning_rate": 1.0397986655741541e-07, "logits/chosen": -3.3110251426696777, "logits/rejected": -2.9535210132598877, "logps/chosen": -249.51333618164062, "logps/rejected": -331.1379699707031, "loss": 0.1868, "rewards/accuracies": 1.0, "rewards/chosen": 0.5496994853019714, "rewards/margins": 2.435807943344116, "rewards/rejected": -1.8861083984375, "step": 5713 }, { "epoch": 0.66, "learning_rate": 1.0394475008779118e-07, "logits/chosen": -3.366668224334717, "logits/rejected": -3.3062572479248047, "logps/chosen": -343.701416015625, "logps/rejected": -550.7233276367188, "loss": 0.357, "rewards/accuracies": 0.875, "rewards/chosen": -0.25125181674957275, "rewards/margins": 2.227639675140381, "rewards/rejected": -2.478891372680664, "step": 5714 }, { "epoch": 0.66, "learning_rate": 1.0390963361816692e-07, "logits/chosen": -3.6016037464141846, "logits/rejected": -3.4645016193389893, "logps/chosen": -107.71371459960938, "logps/rejected": -210.82025146484375, "loss": 0.2657, "rewards/accuracies": 0.875, "rewards/chosen": 0.13403849303722382, "rewards/margins": 3.5827460289001465, "rewards/rejected": -3.4487078189849854, "step": 5715 }, { "epoch": 0.66, "learning_rate": 1.0387451714854266e-07, "logits/chosen": -2.790761947631836, "logits/rejected": -2.9193239212036133, "logps/chosen": -258.6531982421875, "logps/rejected": -279.71697998046875, "loss": 0.5587, "rewards/accuracies": 0.875, "rewards/chosen": -0.4381749629974365, "rewards/margins": 0.34310364723205566, "rewards/rejected": -0.7812786102294922, "step": 5716 }, { "epoch": 0.66, "learning_rate": 1.038394006789184e-07, "logits/chosen": -3.4202685356140137, "logits/rejected": -3.3117189407348633, "logps/chosen": -444.21319580078125, "logps/rejected": -345.298095703125, "loss": 0.2987, "rewards/accuracies": 0.875, "rewards/chosen": 0.19175538420677185, "rewards/margins": 1.8954626321792603, "rewards/rejected": -1.703707218170166, "step": 5717 }, { "epoch": 0.66, "learning_rate": 1.0380428420929414e-07, "logits/chosen": -3.545107841491699, "logits/rejected": -3.9487690925598145, "logps/chosen": -216.664306640625, "logps/rejected": -296.3105163574219, "loss": 0.4502, "rewards/accuracies": 0.75, "rewards/chosen": -0.7283449769020081, "rewards/margins": 1.1700036525726318, "rewards/rejected": -1.8983485698699951, "step": 5718 }, { "epoch": 0.66, "learning_rate": 1.0376916773966991e-07, "logits/chosen": -2.916353940963745, "logits/rejected": -2.8376336097717285, "logps/chosen": -313.30224609375, "logps/rejected": -294.0783996582031, "loss": 0.2634, "rewards/accuracies": 1.0, "rewards/chosen": 0.07945895195007324, "rewards/margins": 2.3460144996643066, "rewards/rejected": -2.2665555477142334, "step": 5719 }, { "epoch": 0.66, "learning_rate": 1.0373405127004565e-07, "logits/chosen": -3.2879676818847656, "logits/rejected": -3.1487197875976562, "logps/chosen": -329.46575927734375, "logps/rejected": -263.84033203125, "loss": 0.2686, "rewards/accuracies": 1.0, "rewards/chosen": 0.35222020745277405, "rewards/margins": 2.444688320159912, "rewards/rejected": -2.092468023300171, "step": 5720 }, { "epoch": 0.66, "learning_rate": 1.0369893480042139e-07, "logits/chosen": -2.9500763416290283, "logits/rejected": -2.991264581680298, "logps/chosen": -292.0182189941406, "logps/rejected": -346.2338562011719, "loss": 0.1991, "rewards/accuracies": 1.0, "rewards/chosen": 0.6156041026115417, "rewards/margins": 1.9652278423309326, "rewards/rejected": -1.349623680114746, "step": 5721 }, { "epoch": 0.66, "learning_rate": 1.0366381833079713e-07, "logits/chosen": -2.7860605716705322, "logits/rejected": -2.4905943870544434, "logps/chosen": -212.73974609375, "logps/rejected": -225.8954620361328, "loss": 0.3071, "rewards/accuracies": 1.0, "rewards/chosen": -0.24492380023002625, "rewards/margins": 1.484731674194336, "rewards/rejected": -1.7296556234359741, "step": 5722 }, { "epoch": 0.66, "learning_rate": 1.0362870186117289e-07, "logits/chosen": -3.043379306793213, "logits/rejected": -2.696592330932617, "logps/chosen": -283.6926574707031, "logps/rejected": -224.9112548828125, "loss": 0.4477, "rewards/accuracies": 0.75, "rewards/chosen": -0.66765296459198, "rewards/margins": 1.1680107116699219, "rewards/rejected": -1.8356637954711914, "step": 5723 }, { "epoch": 0.66, "learning_rate": 1.0359358539154863e-07, "logits/chosen": -2.4576287269592285, "logits/rejected": -2.4469122886657715, "logps/chosen": -262.71685791015625, "logps/rejected": -321.0290222167969, "loss": 0.371, "rewards/accuracies": 1.0, "rewards/chosen": -0.11369910836219788, "rewards/margins": 1.3149696588516235, "rewards/rejected": -1.428668737411499, "step": 5724 }, { "epoch": 0.66, "learning_rate": 1.0355846892192437e-07, "logits/chosen": -2.110001802444458, "logits/rejected": -2.3591601848602295, "logps/chosen": -270.73016357421875, "logps/rejected": -211.98812866210938, "loss": 0.3144, "rewards/accuracies": 0.875, "rewards/chosen": -0.1545550376176834, "rewards/margins": 1.9593926668167114, "rewards/rejected": -2.113947629928589, "step": 5725 }, { "epoch": 0.66, "learning_rate": 1.0352335245230012e-07, "logits/chosen": -2.622436285018921, "logits/rejected": -2.674302577972412, "logps/chosen": -103.29092407226562, "logps/rejected": -248.2713623046875, "loss": 0.2216, "rewards/accuracies": 1.0, "rewards/chosen": 0.6769444942474365, "rewards/margins": 2.1094658374786377, "rewards/rejected": -1.4325213432312012, "step": 5726 }, { "epoch": 0.66, "learning_rate": 1.0348823598267588e-07, "logits/chosen": -2.843531847000122, "logits/rejected": -2.6668548583984375, "logps/chosen": -278.90863037109375, "logps/rejected": -343.7578430175781, "loss": 0.7383, "rewards/accuracies": 0.625, "rewards/chosen": -0.2215561419725418, "rewards/margins": 0.4181056320667267, "rewards/rejected": -0.6396617889404297, "step": 5727 }, { "epoch": 0.66, "learning_rate": 1.0345311951305162e-07, "logits/chosen": -2.365753650665283, "logits/rejected": -2.570936441421509, "logps/chosen": -289.2744140625, "logps/rejected": -277.8552551269531, "loss": 0.3276, "rewards/accuracies": 0.75, "rewards/chosen": -0.05299463868141174, "rewards/margins": 1.836580753326416, "rewards/rejected": -1.8895753622055054, "step": 5728 }, { "epoch": 0.66, "learning_rate": 1.0341800304342736e-07, "logits/chosen": -3.182713031768799, "logits/rejected": -2.8020620346069336, "logps/chosen": -382.60546875, "logps/rejected": -385.7548522949219, "loss": 0.1858, "rewards/accuracies": 0.875, "rewards/chosen": 0.2580437660217285, "rewards/margins": 2.751758575439453, "rewards/rejected": -2.4937148094177246, "step": 5729 }, { "epoch": 0.66, "learning_rate": 1.033828865738031e-07, "logits/chosen": -3.3183655738830566, "logits/rejected": -3.046914577484131, "logps/chosen": -214.63174438476562, "logps/rejected": -295.0330810546875, "loss": 0.3521, "rewards/accuracies": 0.875, "rewards/chosen": -0.3338734209537506, "rewards/margins": 2.296858310699463, "rewards/rejected": -2.6307315826416016, "step": 5730 }, { "epoch": 0.66, "learning_rate": 1.0334777010417886e-07, "logits/chosen": -3.3060357570648193, "logits/rejected": -3.224677085876465, "logps/chosen": -366.2154541015625, "logps/rejected": -312.2091064453125, "loss": 0.4923, "rewards/accuracies": 0.625, "rewards/chosen": -0.46116968989372253, "rewards/margins": 1.7030208110809326, "rewards/rejected": -2.1641905307769775, "step": 5731 }, { "epoch": 0.66, "learning_rate": 1.033126536345546e-07, "logits/chosen": -3.242741584777832, "logits/rejected": -3.4385271072387695, "logps/chosen": -167.60096740722656, "logps/rejected": -212.5272674560547, "loss": 0.3071, "rewards/accuracies": 0.875, "rewards/chosen": 0.06767319142818451, "rewards/margins": 2.017521381378174, "rewards/rejected": -1.9498481750488281, "step": 5732 }, { "epoch": 0.66, "learning_rate": 1.0327753716493035e-07, "logits/chosen": -3.8079960346221924, "logits/rejected": -3.7925806045532227, "logps/chosen": -138.5676727294922, "logps/rejected": -171.40826416015625, "loss": 0.5716, "rewards/accuracies": 0.625, "rewards/chosen": -0.23291721940040588, "rewards/margins": 0.6927387118339539, "rewards/rejected": -0.9256559610366821, "step": 5733 }, { "epoch": 0.66, "learning_rate": 1.0324242069530609e-07, "logits/chosen": -2.6706252098083496, "logits/rejected": -2.9224562644958496, "logps/chosen": -383.078857421875, "logps/rejected": -248.27459716796875, "loss": 0.426, "rewards/accuracies": 0.75, "rewards/chosen": 0.1918918490409851, "rewards/margins": 1.5502405166625977, "rewards/rejected": -1.3583484888076782, "step": 5734 }, { "epoch": 0.66, "learning_rate": 1.0320730422568184e-07, "logits/chosen": -2.640475034713745, "logits/rejected": -2.404812812805176, "logps/chosen": -190.0913543701172, "logps/rejected": -200.3535614013672, "loss": 0.4559, "rewards/accuracies": 1.0, "rewards/chosen": -0.49830782413482666, "rewards/margins": 0.7523486018180847, "rewards/rejected": -1.2506563663482666, "step": 5735 }, { "epoch": 0.66, "learning_rate": 1.031721877560576e-07, "logits/chosen": -3.041409730911255, "logits/rejected": -3.019402027130127, "logps/chosen": -264.6996154785156, "logps/rejected": -371.77130126953125, "loss": 0.3476, "rewards/accuracies": 0.625, "rewards/chosen": 0.015005752444267273, "rewards/margins": 3.2795512676239014, "rewards/rejected": -3.26454496383667, "step": 5736 }, { "epoch": 0.66, "learning_rate": 1.0313707128643333e-07, "logits/chosen": -2.8524675369262695, "logits/rejected": -2.885733127593994, "logps/chosen": -387.8680419921875, "logps/rejected": -312.30218505859375, "loss": 0.5176, "rewards/accuracies": 0.75, "rewards/chosen": 0.44686976075172424, "rewards/margins": 1.2730282545089722, "rewards/rejected": -0.8261585235595703, "step": 5737 }, { "epoch": 0.66, "learning_rate": 1.0310195481680908e-07, "logits/chosen": -2.345773935317993, "logits/rejected": -2.759247303009033, "logps/chosen": -321.7190246582031, "logps/rejected": -215.51544189453125, "loss": 0.1919, "rewards/accuracies": 0.875, "rewards/chosen": 0.06312372535467148, "rewards/margins": 2.3416717052459717, "rewards/rejected": -2.278548002243042, "step": 5738 }, { "epoch": 0.66, "learning_rate": 1.0306683834718483e-07, "logits/chosen": -3.4460511207580566, "logits/rejected": -3.118619680404663, "logps/chosen": -286.6305847167969, "logps/rejected": -173.76528930664062, "loss": 0.3881, "rewards/accuracies": 0.875, "rewards/chosen": -0.5135477185249329, "rewards/margins": 1.3169912099838257, "rewards/rejected": -1.8305387496948242, "step": 5739 }, { "epoch": 0.66, "learning_rate": 1.0303172187756057e-07, "logits/chosen": -3.0794808864593506, "logits/rejected": -2.8718647956848145, "logps/chosen": -271.12091064453125, "logps/rejected": -255.77081298828125, "loss": 0.2727, "rewards/accuracies": 0.875, "rewards/chosen": 0.26195651292800903, "rewards/margins": 2.050210475921631, "rewards/rejected": -1.7882541418075562, "step": 5740 }, { "epoch": 0.66, "learning_rate": 1.0299660540793631e-07, "logits/chosen": -3.616921901702881, "logits/rejected": -3.5958313941955566, "logps/chosen": -240.07421875, "logps/rejected": -308.73089599609375, "loss": 0.3026, "rewards/accuracies": 1.0, "rewards/chosen": -0.3943845331668854, "rewards/margins": 1.7851823568344116, "rewards/rejected": -2.1795668601989746, "step": 5741 }, { "epoch": 0.66, "learning_rate": 1.0296148893831205e-07, "logits/chosen": -3.210824489593506, "logits/rejected": -2.970659017562866, "logps/chosen": -419.97796630859375, "logps/rejected": -358.19085693359375, "loss": 0.9998, "rewards/accuracies": 0.75, "rewards/chosen": -0.6633117198944092, "rewards/margins": 0.7042019963264465, "rewards/rejected": -1.367513656616211, "step": 5742 }, { "epoch": 0.66, "learning_rate": 1.0292637246868782e-07, "logits/chosen": -2.979198694229126, "logits/rejected": -2.5398201942443848, "logps/chosen": -293.94439697265625, "logps/rejected": -277.1271667480469, "loss": 0.3335, "rewards/accuracies": 0.75, "rewards/chosen": -0.5395147204399109, "rewards/margins": 1.6915919780731201, "rewards/rejected": -2.231106758117676, "step": 5743 }, { "epoch": 0.66, "learning_rate": 1.0289125599906356e-07, "logits/chosen": -3.6044628620147705, "logits/rejected": -3.719559669494629, "logps/chosen": -289.176025390625, "logps/rejected": -311.2850036621094, "loss": 0.5243, "rewards/accuracies": 0.75, "rewards/chosen": -0.058010444045066833, "rewards/margins": 0.541130006313324, "rewards/rejected": -0.5991405248641968, "step": 5744 }, { "epoch": 0.66, "learning_rate": 1.028561395294393e-07, "logits/chosen": -2.6133618354797363, "logits/rejected": -2.87786602973938, "logps/chosen": -216.2079315185547, "logps/rejected": -193.7664337158203, "loss": 0.5019, "rewards/accuracies": 0.75, "rewards/chosen": 0.05857527628540993, "rewards/margins": 1.7002768516540527, "rewards/rejected": -1.6417016983032227, "step": 5745 }, { "epoch": 0.66, "learning_rate": 1.0282102305981504e-07, "logits/chosen": -3.280165195465088, "logits/rejected": -3.311551332473755, "logps/chosen": -190.90087890625, "logps/rejected": -228.99082946777344, "loss": 0.2795, "rewards/accuracies": 0.875, "rewards/chosen": 0.09299305081367493, "rewards/margins": 1.8984345197677612, "rewards/rejected": -1.8054416179656982, "step": 5746 }, { "epoch": 0.66, "learning_rate": 1.0278590659019081e-07, "logits/chosen": -2.6901144981384277, "logits/rejected": -2.9623804092407227, "logps/chosen": -314.64324951171875, "logps/rejected": -197.41644287109375, "loss": 0.4093, "rewards/accuracies": 0.75, "rewards/chosen": 0.12403127551078796, "rewards/margins": 2.2017598152160645, "rewards/rejected": -2.077728509902954, "step": 5747 }, { "epoch": 0.66, "learning_rate": 1.0275079012056655e-07, "logits/chosen": -2.7973833084106445, "logits/rejected": -3.1017847061157227, "logps/chosen": -255.87982177734375, "logps/rejected": -200.33206176757812, "loss": 0.1903, "rewards/accuracies": 1.0, "rewards/chosen": 0.39685922861099243, "rewards/margins": 2.0026543140411377, "rewards/rejected": -1.6057950258255005, "step": 5748 }, { "epoch": 0.66, "learning_rate": 1.0271567365094229e-07, "logits/chosen": -2.4292118549346924, "logits/rejected": -2.435119867324829, "logps/chosen": -325.36627197265625, "logps/rejected": -277.3565368652344, "loss": 0.3013, "rewards/accuracies": 0.875, "rewards/chosen": -0.29526379704475403, "rewards/margins": 1.527868390083313, "rewards/rejected": -1.8231322765350342, "step": 5749 }, { "epoch": 0.66, "learning_rate": 1.0268055718131803e-07, "logits/chosen": -3.1930861473083496, "logits/rejected": -3.105412006378174, "logps/chosen": -158.46270751953125, "logps/rejected": -219.99899291992188, "loss": 0.8084, "rewards/accuracies": 0.625, "rewards/chosen": -0.3112417757511139, "rewards/margins": 0.7355090379714966, "rewards/rejected": -1.046750783920288, "step": 5750 }, { "epoch": 0.66, "learning_rate": 1.0264544071169378e-07, "logits/chosen": -2.847608804702759, "logits/rejected": -2.8641858100891113, "logps/chosen": -241.14773559570312, "logps/rejected": -227.56137084960938, "loss": 0.4569, "rewards/accuracies": 0.875, "rewards/chosen": -0.14421606063842773, "rewards/margins": 1.8780659437179565, "rewards/rejected": -2.0222818851470947, "step": 5751 }, { "epoch": 0.66, "learning_rate": 1.0261032424206952e-07, "logits/chosen": -3.65132999420166, "logits/rejected": -3.207498073577881, "logps/chosen": -269.8680725097656, "logps/rejected": -225.82614135742188, "loss": 0.3324, "rewards/accuracies": 1.0, "rewards/chosen": -0.14805744588375092, "rewards/margins": 1.2387300729751587, "rewards/rejected": -1.3867875337600708, "step": 5752 }, { "epoch": 0.66, "learning_rate": 1.0257520777244528e-07, "logits/chosen": -2.9970133304595947, "logits/rejected": -3.2680580615997314, "logps/chosen": -279.75506591796875, "logps/rejected": -259.4080810546875, "loss": 0.3932, "rewards/accuracies": 0.75, "rewards/chosen": 0.7207176089286804, "rewards/margins": 1.026503324508667, "rewards/rejected": -0.3057858347892761, "step": 5753 }, { "epoch": 0.66, "learning_rate": 1.0254009130282102e-07, "logits/chosen": -3.44746994972229, "logits/rejected": -3.162877321243286, "logps/chosen": -197.7921142578125, "logps/rejected": -163.2577667236328, "loss": 0.3743, "rewards/accuracies": 0.875, "rewards/chosen": 0.02987562119960785, "rewards/margins": 1.3482426404953003, "rewards/rejected": -1.3183670043945312, "step": 5754 }, { "epoch": 0.66, "learning_rate": 1.0250497483319677e-07, "logits/chosen": -2.5681235790252686, "logits/rejected": -2.328019380569458, "logps/chosen": -297.33062744140625, "logps/rejected": -267.73529052734375, "loss": 0.3946, "rewards/accuracies": 0.75, "rewards/chosen": 0.03810843825340271, "rewards/margins": 1.5640288591384888, "rewards/rejected": -1.5259203910827637, "step": 5755 }, { "epoch": 0.66, "learning_rate": 1.0246985836357251e-07, "logits/chosen": -3.223646402359009, "logits/rejected": -3.1563305854797363, "logps/chosen": -416.85455322265625, "logps/rejected": -366.9803161621094, "loss": 0.2251, "rewards/accuracies": 1.0, "rewards/chosen": 0.29481378197669983, "rewards/margins": 2.5134780406951904, "rewards/rejected": -2.2186641693115234, "step": 5756 }, { "epoch": 0.66, "learning_rate": 1.0243474189394825e-07, "logits/chosen": -3.3645846843719482, "logits/rejected": -3.0482821464538574, "logps/chosen": -250.76380920410156, "logps/rejected": -205.61764526367188, "loss": 0.5448, "rewards/accuracies": 0.75, "rewards/chosen": -0.0019525587558746338, "rewards/margins": 0.9233155250549316, "rewards/rejected": -0.9252680540084839, "step": 5757 }, { "epoch": 0.66, "learning_rate": 1.02399625424324e-07, "logits/chosen": -3.7089920043945312, "logits/rejected": -3.4363558292388916, "logps/chosen": -211.56361389160156, "logps/rejected": -268.3355712890625, "loss": 0.4072, "rewards/accuracies": 0.875, "rewards/chosen": -0.9486469030380249, "rewards/margins": 2.0298609733581543, "rewards/rejected": -2.9785079956054688, "step": 5758 }, { "epoch": 0.66, "learning_rate": 1.0236450895469976e-07, "logits/chosen": -3.1310739517211914, "logits/rejected": -2.936893939971924, "logps/chosen": -269.208740234375, "logps/rejected": -186.3160858154297, "loss": 0.6258, "rewards/accuracies": 0.75, "rewards/chosen": -0.13150577247142792, "rewards/margins": 0.5692039132118225, "rewards/rejected": -0.7007097005844116, "step": 5759 }, { "epoch": 0.66, "learning_rate": 1.023293924850755e-07, "logits/chosen": -3.5869882106781006, "logits/rejected": -3.253415107727051, "logps/chosen": -326.01904296875, "logps/rejected": -340.59417724609375, "loss": 0.5657, "rewards/accuracies": 0.875, "rewards/chosen": -0.8326348066329956, "rewards/margins": 2.3703763484954834, "rewards/rejected": -3.2030110359191895, "step": 5760 }, { "epoch": 0.66, "learning_rate": 1.0229427601545124e-07, "logits/chosen": -2.6967151165008545, "logits/rejected": -2.81656551361084, "logps/chosen": -316.7264709472656, "logps/rejected": -256.78936767578125, "loss": 0.2689, "rewards/accuracies": 0.875, "rewards/chosen": 0.03522692248225212, "rewards/margins": 1.8587560653686523, "rewards/rejected": -1.8235292434692383, "step": 5761 }, { "epoch": 0.66, "learning_rate": 1.0225915954582698e-07, "logits/chosen": -2.6511945724487305, "logits/rejected": -2.6449978351593018, "logps/chosen": -281.9930725097656, "logps/rejected": -209.66607666015625, "loss": 0.5767, "rewards/accuracies": 0.625, "rewards/chosen": -0.04240059107542038, "rewards/margins": 0.6381044387817383, "rewards/rejected": -0.6805050373077393, "step": 5762 }, { "epoch": 0.66, "learning_rate": 1.0222404307620272e-07, "logits/chosen": -3.1640877723693848, "logits/rejected": -2.7555370330810547, "logps/chosen": -438.7602233886719, "logps/rejected": -346.1592102050781, "loss": 0.3816, "rewards/accuracies": 0.75, "rewards/chosen": -0.01563824713230133, "rewards/margins": 1.8843624591827393, "rewards/rejected": -1.900000810623169, "step": 5763 }, { "epoch": 0.66, "learning_rate": 1.0218892660657849e-07, "logits/chosen": -2.4433419704437256, "logits/rejected": -2.7226004600524902, "logps/chosen": -170.5955810546875, "logps/rejected": -267.266357421875, "loss": 0.6218, "rewards/accuracies": 0.625, "rewards/chosen": -0.9170100092887878, "rewards/margins": 1.0372779369354248, "rewards/rejected": -1.9542880058288574, "step": 5764 }, { "epoch": 0.66, "learning_rate": 1.0215381013695423e-07, "logits/chosen": -2.6751208305358887, "logits/rejected": -2.867249011993408, "logps/chosen": -154.9772186279297, "logps/rejected": -199.23590087890625, "loss": 1.0693, "rewards/accuracies": 0.5, "rewards/chosen": -1.2327595949172974, "rewards/margins": -0.4381016492843628, "rewards/rejected": -0.7946579456329346, "step": 5765 }, { "epoch": 0.66, "learning_rate": 1.0211869366732997e-07, "logits/chosen": -4.129702568054199, "logits/rejected": -3.8299977779388428, "logps/chosen": -363.24261474609375, "logps/rejected": -212.00680541992188, "loss": 0.2667, "rewards/accuracies": 0.875, "rewards/chosen": -0.21536211669445038, "rewards/margins": 1.8848932981491089, "rewards/rejected": -2.100255250930786, "step": 5766 }, { "epoch": 0.66, "learning_rate": 1.0208357719770571e-07, "logits/chosen": -2.9163601398468018, "logits/rejected": -2.844752550125122, "logps/chosen": -247.35093688964844, "logps/rejected": -271.97711181640625, "loss": 0.197, "rewards/accuracies": 1.0, "rewards/chosen": -0.115823894739151, "rewards/margins": 2.4005630016326904, "rewards/rejected": -2.5163869857788086, "step": 5767 }, { "epoch": 0.66, "learning_rate": 1.0204846072808147e-07, "logits/chosen": -1.7753016948699951, "logits/rejected": -2.1553795337677, "logps/chosen": -428.62896728515625, "logps/rejected": -381.05859375, "loss": 0.6075, "rewards/accuracies": 0.75, "rewards/chosen": 0.516228973865509, "rewards/margins": 1.1322059631347656, "rewards/rejected": -0.6159769296646118, "step": 5768 }, { "epoch": 0.67, "learning_rate": 1.0201334425845721e-07, "logits/chosen": -2.284559726715088, "logits/rejected": -2.2727506160736084, "logps/chosen": -301.8162841796875, "logps/rejected": -253.64382934570312, "loss": 1.1537, "rewards/accuracies": 0.625, "rewards/chosen": -0.6056938767433167, "rewards/margins": 0.03116026520729065, "rewards/rejected": -0.6368541121482849, "step": 5769 }, { "epoch": 0.67, "learning_rate": 1.0197822778883296e-07, "logits/chosen": -3.067047595977783, "logits/rejected": -3.5173614025115967, "logps/chosen": -176.16964721679688, "logps/rejected": -190.1708526611328, "loss": 0.4539, "rewards/accuracies": 0.875, "rewards/chosen": -0.0789172500371933, "rewards/margins": 2.098031520843506, "rewards/rejected": -2.1769487857818604, "step": 5770 }, { "epoch": 0.67, "learning_rate": 1.019431113192087e-07, "logits/chosen": -3.1975488662719727, "logits/rejected": -3.459874153137207, "logps/chosen": -113.69181823730469, "logps/rejected": -200.95675659179688, "loss": 0.2564, "rewards/accuracies": 0.875, "rewards/chosen": 0.33539271354675293, "rewards/margins": 2.6684658527374268, "rewards/rejected": -2.333073139190674, "step": 5771 }, { "epoch": 0.67, "learning_rate": 1.0190799484958446e-07, "logits/chosen": -2.847388505935669, "logits/rejected": -2.923521041870117, "logps/chosen": -150.17897033691406, "logps/rejected": -170.14898681640625, "loss": 0.4886, "rewards/accuracies": 0.625, "rewards/chosen": 0.09042772650718689, "rewards/margins": 1.5247735977172852, "rewards/rejected": -1.4343459606170654, "step": 5772 }, { "epoch": 0.67, "learning_rate": 1.018728783799602e-07, "logits/chosen": -3.1550066471099854, "logits/rejected": -3.094484806060791, "logps/chosen": -209.30865478515625, "logps/rejected": -138.1820068359375, "loss": 0.3245, "rewards/accuracies": 0.875, "rewards/chosen": -0.10885527729988098, "rewards/margins": 1.2718846797943115, "rewards/rejected": -1.3807399272918701, "step": 5773 }, { "epoch": 0.67, "learning_rate": 1.0183776191033594e-07, "logits/chosen": -3.1068968772888184, "logits/rejected": -3.2536115646362305, "logps/chosen": -129.399169921875, "logps/rejected": -144.71841430664062, "loss": 0.4908, "rewards/accuracies": 0.875, "rewards/chosen": -0.03554859757423401, "rewards/margins": 1.4550303220748901, "rewards/rejected": -1.4905788898468018, "step": 5774 }, { "epoch": 0.67, "learning_rate": 1.0180264544071168e-07, "logits/chosen": -3.316539764404297, "logits/rejected": -3.354823589324951, "logps/chosen": -164.30441284179688, "logps/rejected": -258.5980224609375, "loss": 0.3985, "rewards/accuracies": 0.875, "rewards/chosen": -0.26064959168434143, "rewards/margins": 1.0162330865859985, "rewards/rejected": -1.2768826484680176, "step": 5775 }, { "epoch": 0.67, "learning_rate": 1.0176752897108744e-07, "logits/chosen": -3.994906425476074, "logits/rejected": -4.15238618850708, "logps/chosen": -226.6694793701172, "logps/rejected": -328.47900390625, "loss": 0.4483, "rewards/accuracies": 0.75, "rewards/chosen": 0.10846003144979477, "rewards/margins": 1.8222503662109375, "rewards/rejected": -1.7137904167175293, "step": 5776 }, { "epoch": 0.67, "learning_rate": 1.0173241250146318e-07, "logits/chosen": -3.3136508464813232, "logits/rejected": -3.368170976638794, "logps/chosen": -262.1461181640625, "logps/rejected": -326.2315673828125, "loss": 0.3512, "rewards/accuracies": 0.75, "rewards/chosen": -0.2825905680656433, "rewards/margins": 2.5727834701538086, "rewards/rejected": -2.855374336242676, "step": 5777 }, { "epoch": 0.67, "learning_rate": 1.0169729603183893e-07, "logits/chosen": -3.6554059982299805, "logits/rejected": -3.600076675415039, "logps/chosen": -392.1745300292969, "logps/rejected": -472.44342041015625, "loss": 0.3589, "rewards/accuracies": 0.875, "rewards/chosen": -0.7090254426002502, "rewards/margins": 2.098874092102051, "rewards/rejected": -2.8078997135162354, "step": 5778 }, { "epoch": 0.67, "learning_rate": 1.0166217956221467e-07, "logits/chosen": -3.3757266998291016, "logits/rejected": -3.0989155769348145, "logps/chosen": -266.2609558105469, "logps/rejected": -206.15481567382812, "loss": 0.6698, "rewards/accuracies": 0.5, "rewards/chosen": -0.17097771167755127, "rewards/margins": 0.9181965589523315, "rewards/rejected": -1.0891743898391724, "step": 5779 }, { "epoch": 0.67, "learning_rate": 1.0162706309259042e-07, "logits/chosen": -2.482785701751709, "logits/rejected": -2.464113235473633, "logps/chosen": -479.3082275390625, "logps/rejected": -307.0700378417969, "loss": 0.3014, "rewards/accuracies": 0.875, "rewards/chosen": 0.3413582146167755, "rewards/margins": 1.5213143825531006, "rewards/rejected": -1.1799561977386475, "step": 5780 }, { "epoch": 0.67, "learning_rate": 1.0159194662296617e-07, "logits/chosen": -3.010310649871826, "logits/rejected": -2.9848668575286865, "logps/chosen": -378.9236755371094, "logps/rejected": -376.0784912109375, "loss": 0.4985, "rewards/accuracies": 0.75, "rewards/chosen": -0.055248845368623734, "rewards/margins": 1.4566857814788818, "rewards/rejected": -1.5119346380233765, "step": 5781 }, { "epoch": 0.67, "learning_rate": 1.0155683015334191e-07, "logits/chosen": -3.836245059967041, "logits/rejected": -3.853060245513916, "logps/chosen": -178.43960571289062, "logps/rejected": -184.9104461669922, "loss": 0.2176, "rewards/accuracies": 1.0, "rewards/chosen": 0.21229977905750275, "rewards/margins": 2.320981979370117, "rewards/rejected": -2.108682155609131, "step": 5782 }, { "epoch": 0.67, "learning_rate": 1.0152171368371765e-07, "logits/chosen": -3.346668243408203, "logits/rejected": -3.21187162399292, "logps/chosen": -113.52845764160156, "logps/rejected": -176.13787841796875, "loss": 0.3956, "rewards/accuracies": 0.875, "rewards/chosen": 0.26484766602516174, "rewards/margins": 1.6654938459396362, "rewards/rejected": -1.4006460905075073, "step": 5783 }, { "epoch": 0.67, "learning_rate": 1.0148659721409341e-07, "logits/chosen": -2.703242301940918, "logits/rejected": -2.7914578914642334, "logps/chosen": -253.92556762695312, "logps/rejected": -263.5450439453125, "loss": 0.3074, "rewards/accuracies": 0.875, "rewards/chosen": 0.2728787064552307, "rewards/margins": 1.9885873794555664, "rewards/rejected": -1.7157087326049805, "step": 5784 }, { "epoch": 0.67, "learning_rate": 1.0145148074446915e-07, "logits/chosen": -3.3134214878082275, "logits/rejected": -3.026581287384033, "logps/chosen": -169.9081573486328, "logps/rejected": -152.12149047851562, "loss": 0.4661, "rewards/accuracies": 0.875, "rewards/chosen": -0.5954524874687195, "rewards/margins": 0.9023603200912476, "rewards/rejected": -1.4978127479553223, "step": 5785 }, { "epoch": 0.67, "learning_rate": 1.0141636427484489e-07, "logits/chosen": -2.6906309127807617, "logits/rejected": -2.81508731842041, "logps/chosen": -269.5164794921875, "logps/rejected": -344.8387756347656, "loss": 0.2401, "rewards/accuracies": 1.0, "rewards/chosen": 0.07378996908664703, "rewards/margins": 2.0490143299102783, "rewards/rejected": -1.975224256515503, "step": 5786 }, { "epoch": 0.67, "learning_rate": 1.0138124780522064e-07, "logits/chosen": -3.3714749813079834, "logits/rejected": -3.3474299907684326, "logps/chosen": -175.0192108154297, "logps/rejected": -184.05038452148438, "loss": 0.5079, "rewards/accuracies": 0.625, "rewards/chosen": -0.38579031825065613, "rewards/margins": 1.7025578022003174, "rewards/rejected": -2.088348150253296, "step": 5787 }, { "epoch": 0.67, "learning_rate": 1.013461313355964e-07, "logits/chosen": -2.9011149406433105, "logits/rejected": -2.7671215534210205, "logps/chosen": -434.40509033203125, "logps/rejected": -357.1767578125, "loss": 0.4471, "rewards/accuracies": 0.75, "rewards/chosen": 0.033111900091171265, "rewards/margins": 1.1273092031478882, "rewards/rejected": -1.0941972732543945, "step": 5788 }, { "epoch": 0.67, "learning_rate": 1.0131101486597214e-07, "logits/chosen": -3.8368546962738037, "logits/rejected": -3.303015947341919, "logps/chosen": -274.2537536621094, "logps/rejected": -195.52362060546875, "loss": 0.3825, "rewards/accuracies": 0.875, "rewards/chosen": -0.5419259071350098, "rewards/margins": 1.8584747314453125, "rewards/rejected": -2.4004006385803223, "step": 5789 }, { "epoch": 0.67, "learning_rate": 1.0127589839634788e-07, "logits/chosen": -3.756796360015869, "logits/rejected": -3.6050214767456055, "logps/chosen": -202.0507354736328, "logps/rejected": -193.2487335205078, "loss": 0.3237, "rewards/accuracies": 0.875, "rewards/chosen": 0.2241852879524231, "rewards/margins": 2.0515527725219727, "rewards/rejected": -1.8273674249649048, "step": 5790 }, { "epoch": 0.67, "learning_rate": 1.0124078192672362e-07, "logits/chosen": -3.883694648742676, "logits/rejected": -3.527913808822632, "logps/chosen": -294.76654052734375, "logps/rejected": -195.85238647460938, "loss": 0.3692, "rewards/accuracies": 0.875, "rewards/chosen": -0.24595633149147034, "rewards/margins": 1.5026288032531738, "rewards/rejected": -1.7485851049423218, "step": 5791 }, { "epoch": 0.67, "learning_rate": 1.0120566545709939e-07, "logits/chosen": -2.7484538555145264, "logits/rejected": -2.834972858428955, "logps/chosen": -379.41717529296875, "logps/rejected": -254.1068878173828, "loss": 0.7085, "rewards/accuracies": 0.75, "rewards/chosen": -0.1076100766658783, "rewards/margins": 1.8199703693389893, "rewards/rejected": -1.9275805950164795, "step": 5792 }, { "epoch": 0.67, "learning_rate": 1.0117054898747513e-07, "logits/chosen": -2.676919937133789, "logits/rejected": -2.620404005050659, "logps/chosen": -400.7017517089844, "logps/rejected": -336.02569580078125, "loss": 0.1799, "rewards/accuracies": 0.875, "rewards/chosen": 0.5211951732635498, "rewards/margins": 3.4666409492492676, "rewards/rejected": -2.9454457759857178, "step": 5793 }, { "epoch": 0.67, "learning_rate": 1.0113543251785087e-07, "logits/chosen": -2.9586222171783447, "logits/rejected": -3.1434831619262695, "logps/chosen": -177.74632263183594, "logps/rejected": -176.98435974121094, "loss": 0.3928, "rewards/accuracies": 0.875, "rewards/chosen": -0.003329724073410034, "rewards/margins": 1.0225564241409302, "rewards/rejected": -1.025886058807373, "step": 5794 }, { "epoch": 0.67, "learning_rate": 1.0110031604822661e-07, "logits/chosen": -3.4130098819732666, "logits/rejected": -3.1802778244018555, "logps/chosen": -303.45318603515625, "logps/rejected": -250.43319702148438, "loss": 0.2006, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031204447150230408, "rewards/margins": 2.5881195068359375, "rewards/rejected": -2.5912399291992188, "step": 5795 }, { "epoch": 0.67, "learning_rate": 1.0106519957860236e-07, "logits/chosen": -2.7161779403686523, "logits/rejected": -2.7206342220306396, "logps/chosen": -333.5968017578125, "logps/rejected": -363.04791259765625, "loss": 0.4413, "rewards/accuracies": 0.75, "rewards/chosen": -0.08945446461439133, "rewards/margins": 1.795944333076477, "rewards/rejected": -1.8853987455368042, "step": 5796 }, { "epoch": 0.67, "learning_rate": 1.010300831089781e-07, "logits/chosen": -3.0436596870422363, "logits/rejected": -2.7899258136749268, "logps/chosen": -246.21946716308594, "logps/rejected": -312.4344482421875, "loss": 0.5526, "rewards/accuracies": 0.75, "rewards/chosen": 0.3712711036205292, "rewards/margins": 2.3040151596069336, "rewards/rejected": -1.9327441453933716, "step": 5797 }, { "epoch": 0.67, "learning_rate": 1.0099496663935386e-07, "logits/chosen": -3.3282346725463867, "logits/rejected": -3.342134475708008, "logps/chosen": -229.2913818359375, "logps/rejected": -163.58731079101562, "loss": 0.2578, "rewards/accuracies": 1.0, "rewards/chosen": 0.3030242919921875, "rewards/margins": 1.673163890838623, "rewards/rejected": -1.370139718055725, "step": 5798 }, { "epoch": 0.67, "learning_rate": 1.009598501697296e-07, "logits/chosen": -2.896559476852417, "logits/rejected": -2.893857002258301, "logps/chosen": -312.3309326171875, "logps/rejected": -307.76947021484375, "loss": 0.3169, "rewards/accuracies": 0.875, "rewards/chosen": -0.14569054543972015, "rewards/margins": 2.316917896270752, "rewards/rejected": -2.462608575820923, "step": 5799 }, { "epoch": 0.67, "learning_rate": 1.0092473370010535e-07, "logits/chosen": -3.0085935592651367, "logits/rejected": -2.9898650646209717, "logps/chosen": -324.7966613769531, "logps/rejected": -336.0631103515625, "loss": 0.5702, "rewards/accuracies": 0.75, "rewards/chosen": -1.1573454141616821, "rewards/margins": 0.8149951100349426, "rewards/rejected": -1.9723405838012695, "step": 5800 }, { "epoch": 0.67, "learning_rate": 1.0088961723048109e-07, "logits/chosen": -2.9146485328674316, "logits/rejected": -2.890150308609009, "logps/chosen": -235.1514892578125, "logps/rejected": -434.43377685546875, "loss": 0.3705, "rewards/accuracies": 0.875, "rewards/chosen": 0.07833784818649292, "rewards/margins": 1.8482413291931152, "rewards/rejected": -1.769903540611267, "step": 5801 }, { "epoch": 0.67, "learning_rate": 1.0085450076085683e-07, "logits/chosen": -3.4724907875061035, "logits/rejected": -3.607898473739624, "logps/chosen": -204.73153686523438, "logps/rejected": -247.36355590820312, "loss": 0.2157, "rewards/accuracies": 0.875, "rewards/chosen": -0.05382850766181946, "rewards/margins": 2.374692916870117, "rewards/rejected": -2.428521156311035, "step": 5802 }, { "epoch": 0.67, "learning_rate": 1.0081938429123257e-07, "logits/chosen": -3.213408946990967, "logits/rejected": -3.3134405612945557, "logps/chosen": -309.4417724609375, "logps/rejected": -383.5044250488281, "loss": 0.2582, "rewards/accuracies": 0.875, "rewards/chosen": 0.5420499444007874, "rewards/margins": 2.840010166168213, "rewards/rejected": -2.2979602813720703, "step": 5803 }, { "epoch": 0.67, "learning_rate": 1.0078426782160834e-07, "logits/chosen": -2.6683239936828613, "logits/rejected": -2.630152463912964, "logps/chosen": -331.8743896484375, "logps/rejected": -396.02044677734375, "loss": 0.5073, "rewards/accuracies": 0.875, "rewards/chosen": -0.10291262716054916, "rewards/margins": 1.9289417266845703, "rewards/rejected": -2.0318543910980225, "step": 5804 }, { "epoch": 0.67, "learning_rate": 1.0074915135198408e-07, "logits/chosen": -3.5045104026794434, "logits/rejected": -3.447598695755005, "logps/chosen": -286.88800048828125, "logps/rejected": -204.11068725585938, "loss": 0.5644, "rewards/accuracies": 0.625, "rewards/chosen": -0.7152926325798035, "rewards/margins": 0.641181230545044, "rewards/rejected": -1.3564739227294922, "step": 5805 }, { "epoch": 0.67, "learning_rate": 1.0071403488235982e-07, "logits/chosen": -3.722761631011963, "logits/rejected": -3.300753355026245, "logps/chosen": -176.16175842285156, "logps/rejected": -153.8076171875, "loss": 0.5166, "rewards/accuracies": 0.75, "rewards/chosen": -0.6921444535255432, "rewards/margins": 1.6760385036468506, "rewards/rejected": -2.368182897567749, "step": 5806 }, { "epoch": 0.67, "learning_rate": 1.0067891841273556e-07, "logits/chosen": -3.275182008743286, "logits/rejected": -3.2051639556884766, "logps/chosen": -378.00909423828125, "logps/rejected": -288.52630615234375, "loss": 0.3137, "rewards/accuracies": 0.75, "rewards/chosen": -0.5098221898078918, "rewards/margins": 2.122068405151367, "rewards/rejected": -2.6318905353546143, "step": 5807 }, { "epoch": 0.67, "learning_rate": 1.0064380194311133e-07, "logits/chosen": -3.3149807453155518, "logits/rejected": -3.4779679775238037, "logps/chosen": -187.40206909179688, "logps/rejected": -222.67373657226562, "loss": 0.7981, "rewards/accuracies": 0.625, "rewards/chosen": -0.7969968318939209, "rewards/margins": 0.6386675238609314, "rewards/rejected": -1.4356642961502075, "step": 5808 }, { "epoch": 0.67, "learning_rate": 1.0060868547348707e-07, "logits/chosen": -2.9451088905334473, "logits/rejected": -3.122645378112793, "logps/chosen": -316.54620361328125, "logps/rejected": -229.63111877441406, "loss": 0.3251, "rewards/accuracies": 0.75, "rewards/chosen": 0.2457353174686432, "rewards/margins": 2.0575978755950928, "rewards/rejected": -1.811862587928772, "step": 5809 }, { "epoch": 0.67, "learning_rate": 1.0057356900386281e-07, "logits/chosen": -2.496589422225952, "logits/rejected": -2.5081653594970703, "logps/chosen": -238.273193359375, "logps/rejected": -157.47410583496094, "loss": 0.7627, "rewards/accuracies": 0.5, "rewards/chosen": -0.49845245480537415, "rewards/margins": 0.20113077759742737, "rewards/rejected": -0.6995831727981567, "step": 5810 }, { "epoch": 0.67, "learning_rate": 1.0053845253423855e-07, "logits/chosen": -3.516080379486084, "logits/rejected": -3.7569658756256104, "logps/chosen": -322.4468078613281, "logps/rejected": -329.191650390625, "loss": 0.2489, "rewards/accuracies": 0.875, "rewards/chosen": 0.5715285539627075, "rewards/margins": 2.620281457901001, "rewards/rejected": -2.048752784729004, "step": 5811 }, { "epoch": 0.67, "learning_rate": 1.0050333606461429e-07, "logits/chosen": -3.1130435466766357, "logits/rejected": -3.6663031578063965, "logps/chosen": -202.66000366210938, "logps/rejected": -224.72091674804688, "loss": 0.1538, "rewards/accuracies": 1.0, "rewards/chosen": 0.6141217947006226, "rewards/margins": 3.2038567066192627, "rewards/rejected": -2.5897347927093506, "step": 5812 }, { "epoch": 0.67, "learning_rate": 1.0046821959499005e-07, "logits/chosen": -3.4487826824188232, "logits/rejected": -3.2191720008850098, "logps/chosen": -283.79180908203125, "logps/rejected": -156.31439208984375, "loss": 0.3469, "rewards/accuracies": 0.875, "rewards/chosen": -0.12212395668029785, "rewards/margins": 1.3463075160980225, "rewards/rejected": -1.4684314727783203, "step": 5813 }, { "epoch": 0.67, "learning_rate": 1.0043310312536579e-07, "logits/chosen": -3.839623212814331, "logits/rejected": -3.9483420848846436, "logps/chosen": -172.6593017578125, "logps/rejected": -189.46749877929688, "loss": 0.4066, "rewards/accuracies": 0.875, "rewards/chosen": -0.2584480345249176, "rewards/margins": 1.363820195198059, "rewards/rejected": -1.6222681999206543, "step": 5814 }, { "epoch": 0.67, "learning_rate": 1.0039798665574154e-07, "logits/chosen": -3.5140485763549805, "logits/rejected": -3.27122163772583, "logps/chosen": -383.28204345703125, "logps/rejected": -358.60211181640625, "loss": 1.6386, "rewards/accuracies": 0.5, "rewards/chosen": -1.0435818433761597, "rewards/margins": -0.3432643413543701, "rewards/rejected": -0.7003175020217896, "step": 5815 }, { "epoch": 0.67, "learning_rate": 1.0036287018611728e-07, "logits/chosen": -2.905308723449707, "logits/rejected": -3.0740103721618652, "logps/chosen": -236.3300018310547, "logps/rejected": -297.1593017578125, "loss": 0.0601, "rewards/accuracies": 1.0, "rewards/chosen": 0.23686744272708893, "rewards/margins": 3.691052198410034, "rewards/rejected": -3.4541850090026855, "step": 5816 }, { "epoch": 0.67, "learning_rate": 1.0032775371649303e-07, "logits/chosen": -3.3402485847473145, "logits/rejected": -2.9279489517211914, "logps/chosen": -253.5897216796875, "logps/rejected": -228.26712036132812, "loss": 0.2099, "rewards/accuracies": 1.0, "rewards/chosen": 0.32365041971206665, "rewards/margins": 1.967529058456421, "rewards/rejected": -1.643878698348999, "step": 5817 }, { "epoch": 0.67, "learning_rate": 1.0029263724686878e-07, "logits/chosen": -2.558081865310669, "logits/rejected": -2.5557870864868164, "logps/chosen": -350.565185546875, "logps/rejected": -196.56488037109375, "loss": 0.8529, "rewards/accuracies": 0.5, "rewards/chosen": 0.26384812593460083, "rewards/margins": 0.38916119933128357, "rewards/rejected": -0.12531307339668274, "step": 5818 }, { "epoch": 0.67, "learning_rate": 1.0025752077724452e-07, "logits/chosen": -2.8891103267669678, "logits/rejected": -2.659825325012207, "logps/chosen": -187.74624633789062, "logps/rejected": -224.08233642578125, "loss": 0.3796, "rewards/accuracies": 0.875, "rewards/chosen": 0.11772335320711136, "rewards/margins": 1.525943636894226, "rewards/rejected": -1.4082202911376953, "step": 5819 }, { "epoch": 0.67, "learning_rate": 1.0022240430762026e-07, "logits/chosen": -3.249738931655884, "logits/rejected": -3.594846248626709, "logps/chosen": -132.2567138671875, "logps/rejected": -190.0453643798828, "loss": 0.4037, "rewards/accuracies": 0.75, "rewards/chosen": 0.552525520324707, "rewards/margins": 1.4455583095550537, "rewards/rejected": -0.8930327892303467, "step": 5820 }, { "epoch": 0.67, "learning_rate": 1.0018728783799602e-07, "logits/chosen": -2.0396735668182373, "logits/rejected": -2.428861379623413, "logps/chosen": -349.7457275390625, "logps/rejected": -219.21897888183594, "loss": 0.5149, "rewards/accuracies": 0.625, "rewards/chosen": -0.5093855857849121, "rewards/margins": 1.0106496810913086, "rewards/rejected": -1.5200352668762207, "step": 5821 }, { "epoch": 0.67, "learning_rate": 1.0015217136837176e-07, "logits/chosen": -3.482848644256592, "logits/rejected": -3.140950918197632, "logps/chosen": -273.78887939453125, "logps/rejected": -226.2015380859375, "loss": 0.2856, "rewards/accuracies": 1.0, "rewards/chosen": -0.4961393475532532, "rewards/margins": 1.8562145233154297, "rewards/rejected": -2.352353811264038, "step": 5822 }, { "epoch": 0.67, "learning_rate": 1.001170548987475e-07, "logits/chosen": -3.4346351623535156, "logits/rejected": -3.2384328842163086, "logps/chosen": -275.3573913574219, "logps/rejected": -308.7955627441406, "loss": 0.7892, "rewards/accuracies": 0.625, "rewards/chosen": -0.33671483397483826, "rewards/margins": 0.2786064147949219, "rewards/rejected": -0.615321159362793, "step": 5823 }, { "epoch": 0.67, "learning_rate": 1.0008193842912325e-07, "logits/chosen": -3.296865463256836, "logits/rejected": -3.3891334533691406, "logps/chosen": -262.0081787109375, "logps/rejected": -299.94989013671875, "loss": 0.349, "rewards/accuracies": 0.875, "rewards/chosen": -0.2126980721950531, "rewards/margins": 2.41021990776062, "rewards/rejected": -2.622917890548706, "step": 5824 }, { "epoch": 0.67, "learning_rate": 1.0004682195949901e-07, "logits/chosen": -2.6350975036621094, "logits/rejected": -2.404244899749756, "logps/chosen": -293.57904052734375, "logps/rejected": -280.9120178222656, "loss": 0.5418, "rewards/accuracies": 0.5, "rewards/chosen": -0.21584530174732208, "rewards/margins": 1.2097697257995605, "rewards/rejected": -1.425614833831787, "step": 5825 }, { "epoch": 0.67, "learning_rate": 1.0001170548987475e-07, "logits/chosen": -2.778956651687622, "logits/rejected": -2.7061169147491455, "logps/chosen": -351.6490173339844, "logps/rejected": -287.24847412109375, "loss": 0.2405, "rewards/accuracies": 0.875, "rewards/chosen": 0.3793846666812897, "rewards/margins": 2.3204641342163086, "rewards/rejected": -1.9410793781280518, "step": 5826 }, { "epoch": 0.67, "learning_rate": 9.99765890202505e-08, "logits/chosen": -2.9550323486328125, "logits/rejected": -3.0358657836914062, "logps/chosen": -347.96710205078125, "logps/rejected": -211.03768920898438, "loss": 0.1004, "rewards/accuracies": 1.0, "rewards/chosen": 0.3780539333820343, "rewards/margins": 2.7084391117095947, "rewards/rejected": -2.3303849697113037, "step": 5827 }, { "epoch": 0.67, "learning_rate": 9.994147255062623e-08, "logits/chosen": -2.8979969024658203, "logits/rejected": -3.1647567749023438, "logps/chosen": -284.60687255859375, "logps/rejected": -175.18212890625, "loss": 0.3387, "rewards/accuracies": 0.875, "rewards/chosen": 0.06215950846672058, "rewards/margins": 1.5969964265823364, "rewards/rejected": -1.534836769104004, "step": 5828 }, { "epoch": 0.67, "learning_rate": 9.990635608100199e-08, "logits/chosen": -2.881619453430176, "logits/rejected": -2.7755208015441895, "logps/chosen": -359.5775146484375, "logps/rejected": -415.3125305175781, "loss": 1.0902, "rewards/accuracies": 0.375, "rewards/chosen": -0.6240990161895752, "rewards/margins": -0.045827507972717285, "rewards/rejected": -0.5782715082168579, "step": 5829 }, { "epoch": 0.67, "learning_rate": 9.987123961137773e-08, "logits/chosen": -2.772982120513916, "logits/rejected": -2.6127946376800537, "logps/chosen": -256.42718505859375, "logps/rejected": -275.9485168457031, "loss": 0.1645, "rewards/accuracies": 1.0, "rewards/chosen": -0.07040619850158691, "rewards/margins": 1.9554518461227417, "rewards/rejected": -2.025857925415039, "step": 5830 }, { "epoch": 0.67, "learning_rate": 9.983612314175347e-08, "logits/chosen": -3.1122207641601562, "logits/rejected": -3.303518772125244, "logps/chosen": -558.4710693359375, "logps/rejected": -337.11639404296875, "loss": 0.1548, "rewards/accuracies": 1.0, "rewards/chosen": 0.26669684052467346, "rewards/margins": 2.3005690574645996, "rewards/rejected": -2.033872365951538, "step": 5831 }, { "epoch": 0.67, "learning_rate": 9.980100667212922e-08, "logits/chosen": -2.8682050704956055, "logits/rejected": -2.988020896911621, "logps/chosen": -139.43836975097656, "logps/rejected": -275.3034362792969, "loss": 0.2016, "rewards/accuracies": 0.875, "rewards/chosen": 0.24663792550563812, "rewards/margins": 3.2882392406463623, "rewards/rejected": -3.0416011810302734, "step": 5832 }, { "epoch": 0.67, "learning_rate": 9.976589020250498e-08, "logits/chosen": -2.9198756217956543, "logits/rejected": -2.9667680263519287, "logps/chosen": -336.41717529296875, "logps/rejected": -218.00355529785156, "loss": 0.1731, "rewards/accuracies": 1.0, "rewards/chosen": 0.21801556646823883, "rewards/margins": 2.861636161804199, "rewards/rejected": -2.643620729446411, "step": 5833 }, { "epoch": 0.67, "learning_rate": 9.973077373288072e-08, "logits/chosen": -2.7886998653411865, "logits/rejected": -2.79207181930542, "logps/chosen": -182.76968383789062, "logps/rejected": -388.615966796875, "loss": 0.6572, "rewards/accuracies": 0.75, "rewards/chosen": -0.33223211765289307, "rewards/margins": 1.6258575916290283, "rewards/rejected": -1.9580897092819214, "step": 5834 }, { "epoch": 0.67, "learning_rate": 9.969565726325646e-08, "logits/chosen": -3.1411495208740234, "logits/rejected": -3.2230286598205566, "logps/chosen": -289.51177978515625, "logps/rejected": -275.09686279296875, "loss": 0.2119, "rewards/accuracies": 1.0, "rewards/chosen": 0.13136039674282074, "rewards/margins": 2.24552059173584, "rewards/rejected": -2.1141600608825684, "step": 5835 }, { "epoch": 0.67, "learning_rate": 9.96605407936322e-08, "logits/chosen": -3.1126928329467773, "logits/rejected": -3.1807703971862793, "logps/chosen": -135.57492065429688, "logps/rejected": -220.7835235595703, "loss": 0.2185, "rewards/accuracies": 1.0, "rewards/chosen": 0.007068857550621033, "rewards/margins": 2.2056119441986084, "rewards/rejected": -2.198542833328247, "step": 5836 }, { "epoch": 0.67, "learning_rate": 9.962542432400797e-08, "logits/chosen": -3.4664762020111084, "logits/rejected": -3.3600733280181885, "logps/chosen": -318.40386962890625, "logps/rejected": -272.092041015625, "loss": 0.1527, "rewards/accuracies": 1.0, "rewards/chosen": 0.3521682024002075, "rewards/margins": 3.011784553527832, "rewards/rejected": -2.659616470336914, "step": 5837 }, { "epoch": 0.67, "learning_rate": 9.959030785438371e-08, "logits/chosen": -3.5358829498291016, "logits/rejected": -3.1728873252868652, "logps/chosen": -244.89422607421875, "logps/rejected": -220.75164794921875, "loss": 0.4375, "rewards/accuracies": 0.875, "rewards/chosen": 0.07542509585618973, "rewards/margins": 1.5355592966079712, "rewards/rejected": -1.4601341485977173, "step": 5838 }, { "epoch": 0.67, "learning_rate": 9.955519138475945e-08, "logits/chosen": -3.33901309967041, "logits/rejected": -3.286229133605957, "logps/chosen": -165.34078979492188, "logps/rejected": -240.930419921875, "loss": 0.3763, "rewards/accuracies": 0.75, "rewards/chosen": -0.5709723830223083, "rewards/margins": 2.2782716751098633, "rewards/rejected": -2.8492441177368164, "step": 5839 }, { "epoch": 0.67, "learning_rate": 9.952007491513519e-08, "logits/chosen": -3.3268375396728516, "logits/rejected": -3.4367496967315674, "logps/chosen": -312.5725402832031, "logps/rejected": -274.41900634765625, "loss": 0.7637, "rewards/accuracies": 0.625, "rewards/chosen": -0.45186296105384827, "rewards/margins": 0.8509145379066467, "rewards/rejected": -1.3027775287628174, "step": 5840 }, { "epoch": 0.67, "learning_rate": 9.948495844551094e-08, "logits/chosen": -2.9128224849700928, "logits/rejected": -2.834986686706543, "logps/chosen": -131.3606719970703, "logps/rejected": -135.44131469726562, "loss": 0.6533, "rewards/accuracies": 0.625, "rewards/chosen": -0.6300539374351501, "rewards/margins": 0.58622807264328, "rewards/rejected": -1.2162820100784302, "step": 5841 }, { "epoch": 0.67, "learning_rate": 9.94498419758867e-08, "logits/chosen": -3.0866498947143555, "logits/rejected": -3.1177048683166504, "logps/chosen": -266.5037841796875, "logps/rejected": -334.56903076171875, "loss": 0.2839, "rewards/accuracies": 0.875, "rewards/chosen": -0.21335217356681824, "rewards/margins": 2.0195071697235107, "rewards/rejected": -2.2328596115112305, "step": 5842 }, { "epoch": 0.67, "learning_rate": 9.941472550626244e-08, "logits/chosen": -3.249605178833008, "logits/rejected": -3.2229976654052734, "logps/chosen": -241.50848388671875, "logps/rejected": -155.09881591796875, "loss": 0.3776, "rewards/accuracies": 0.75, "rewards/chosen": -0.33636894822120667, "rewards/margins": 1.6979063749313354, "rewards/rejected": -2.0342752933502197, "step": 5843 }, { "epoch": 0.67, "learning_rate": 9.937960903663818e-08, "logits/chosen": -3.678542375564575, "logits/rejected": -3.6734166145324707, "logps/chosen": -142.36630249023438, "logps/rejected": -144.0872802734375, "loss": 0.1375, "rewards/accuracies": 1.0, "rewards/chosen": 0.08605018258094788, "rewards/margins": 2.555245876312256, "rewards/rejected": -2.46919584274292, "step": 5844 }, { "epoch": 0.67, "learning_rate": 9.934449256701393e-08, "logits/chosen": -3.554851531982422, "logits/rejected": -3.091127634048462, "logps/chosen": -310.6380310058594, "logps/rejected": -228.78302001953125, "loss": 0.1554, "rewards/accuracies": 1.0, "rewards/chosen": 0.4400203227996826, "rewards/margins": 2.2957780361175537, "rewards/rejected": -1.855757713317871, "step": 5845 }, { "epoch": 0.67, "learning_rate": 9.930937609738967e-08, "logits/chosen": -3.0980947017669678, "logits/rejected": -3.210541009902954, "logps/chosen": -234.4436492919922, "logps/rejected": -188.8125, "loss": 0.4845, "rewards/accuracies": 0.75, "rewards/chosen": -0.31106436252593994, "rewards/margins": 1.0344637632369995, "rewards/rejected": -1.34552800655365, "step": 5846 }, { "epoch": 0.67, "learning_rate": 9.927425962776541e-08, "logits/chosen": -2.6038763523101807, "logits/rejected": -2.6116533279418945, "logps/chosen": -255.38092041015625, "logps/rejected": -221.74240112304688, "loss": 0.5846, "rewards/accuracies": 0.625, "rewards/chosen": 0.03336520493030548, "rewards/margins": 0.9973223805427551, "rewards/rejected": -0.9639571905136108, "step": 5847 }, { "epoch": 0.67, "learning_rate": 9.923914315814115e-08, "logits/chosen": -2.514009952545166, "logits/rejected": -2.7022643089294434, "logps/chosen": -274.4974365234375, "logps/rejected": -263.0378723144531, "loss": 0.3977, "rewards/accuracies": 0.75, "rewards/chosen": -0.025754515081644058, "rewards/margins": 1.8148938417434692, "rewards/rejected": -1.8406481742858887, "step": 5848 }, { "epoch": 0.67, "learning_rate": 9.920402668851692e-08, "logits/chosen": -3.582098960876465, "logits/rejected": -3.658900260925293, "logps/chosen": -267.30126953125, "logps/rejected": -174.888671875, "loss": 0.5852, "rewards/accuracies": 0.75, "rewards/chosen": -0.5774483680725098, "rewards/margins": 0.8964422941207886, "rewards/rejected": -1.4738906621932983, "step": 5849 }, { "epoch": 0.67, "learning_rate": 9.916891021889266e-08, "logits/chosen": -2.7487282752990723, "logits/rejected": -2.9112915992736816, "logps/chosen": -320.3329772949219, "logps/rejected": -348.9295349121094, "loss": 0.7899, "rewards/accuracies": 0.625, "rewards/chosen": -0.4242067039012909, "rewards/margins": 1.186872959136963, "rewards/rejected": -1.6110796928405762, "step": 5850 }, { "epoch": 0.67, "learning_rate": 9.91337937492684e-08, "logits/chosen": -2.5861692428588867, "logits/rejected": -2.83107328414917, "logps/chosen": -208.30081176757812, "logps/rejected": -234.38783264160156, "loss": 0.5015, "rewards/accuracies": 0.75, "rewards/chosen": 0.027596376836299896, "rewards/margins": 1.1610729694366455, "rewards/rejected": -1.1334764957427979, "step": 5851 }, { "epoch": 0.67, "learning_rate": 9.909867727964414e-08, "logits/chosen": -2.796337604522705, "logits/rejected": -2.8866446018218994, "logps/chosen": -405.505126953125, "logps/rejected": -327.7308349609375, "loss": 0.1697, "rewards/accuracies": 1.0, "rewards/chosen": 0.6273916363716125, "rewards/margins": 2.630035400390625, "rewards/rejected": -2.0026438236236572, "step": 5852 }, { "epoch": 0.67, "learning_rate": 9.906356081001991e-08, "logits/chosen": -2.748167037963867, "logits/rejected": -2.9140172004699707, "logps/chosen": -173.40005493164062, "logps/rejected": -191.3587188720703, "loss": 0.4278, "rewards/accuracies": 0.625, "rewards/chosen": -0.17432591319084167, "rewards/margins": 1.5175037384033203, "rewards/rejected": -1.6918295621871948, "step": 5853 }, { "epoch": 0.67, "learning_rate": 9.902844434039565e-08, "logits/chosen": -3.1110167503356934, "logits/rejected": -3.220393657684326, "logps/chosen": -254.31382751464844, "logps/rejected": -276.19476318359375, "loss": 0.3705, "rewards/accuracies": 0.75, "rewards/chosen": 0.00897166132926941, "rewards/margins": 3.100571632385254, "rewards/rejected": -3.091599702835083, "step": 5854 }, { "epoch": 0.67, "learning_rate": 9.899332787077139e-08, "logits/chosen": -3.512065887451172, "logits/rejected": -3.6136631965637207, "logps/chosen": -142.45318603515625, "logps/rejected": -98.55839538574219, "loss": 0.5356, "rewards/accuracies": 0.75, "rewards/chosen": -0.3754810094833374, "rewards/margins": 0.5947369337081909, "rewards/rejected": -0.9702179431915283, "step": 5855 }, { "epoch": 0.68, "learning_rate": 9.895821140114713e-08, "logits/chosen": -3.166360378265381, "logits/rejected": -3.441213369369507, "logps/chosen": -207.2499542236328, "logps/rejected": -232.8955841064453, "loss": 0.3732, "rewards/accuracies": 1.0, "rewards/chosen": -0.435608834028244, "rewards/margins": 2.831185817718506, "rewards/rejected": -3.2667946815490723, "step": 5856 }, { "epoch": 0.68, "learning_rate": 9.892309493152288e-08, "logits/chosen": -3.580611228942871, "logits/rejected": -3.495077610015869, "logps/chosen": -190.08535766601562, "logps/rejected": -230.56906127929688, "loss": 0.4348, "rewards/accuracies": 0.75, "rewards/chosen": -0.06988823413848877, "rewards/margins": 1.7075531482696533, "rewards/rejected": -1.777441382408142, "step": 5857 }, { "epoch": 0.68, "learning_rate": 9.888797846189862e-08, "logits/chosen": -2.831543207168579, "logits/rejected": -3.042236804962158, "logps/chosen": -231.40057373046875, "logps/rejected": -228.1633758544922, "loss": 0.28, "rewards/accuracies": 1.0, "rewards/chosen": -0.12768608331680298, "rewards/margins": 1.7721436023712158, "rewards/rejected": -1.899829626083374, "step": 5858 }, { "epoch": 0.68, "learning_rate": 9.885286199227438e-08, "logits/chosen": -2.8729584217071533, "logits/rejected": -2.7097322940826416, "logps/chosen": -310.5925598144531, "logps/rejected": -218.64349365234375, "loss": 1.1161, "rewards/accuracies": 0.625, "rewards/chosen": -0.7986485362052917, "rewards/margins": 1.927675724029541, "rewards/rejected": -2.7263240814208984, "step": 5859 }, { "epoch": 0.68, "learning_rate": 9.881774552265012e-08, "logits/chosen": -3.352140426635742, "logits/rejected": -3.01410174369812, "logps/chosen": -231.48455810546875, "logps/rejected": -175.70481872558594, "loss": 0.3894, "rewards/accuracies": 0.875, "rewards/chosen": -0.28056812286376953, "rewards/margins": 0.9903408885002136, "rewards/rejected": -1.2709089517593384, "step": 5860 }, { "epoch": 0.68, "learning_rate": 9.878262905302586e-08, "logits/chosen": -3.160825490951538, "logits/rejected": -3.3330297470092773, "logps/chosen": -164.91323852539062, "logps/rejected": -251.02366638183594, "loss": 0.3042, "rewards/accuracies": 0.875, "rewards/chosen": 0.27820488810539246, "rewards/margins": 1.5246586799621582, "rewards/rejected": -1.2464537620544434, "step": 5861 }, { "epoch": 0.68, "learning_rate": 9.874751258340161e-08, "logits/chosen": -3.110218048095703, "logits/rejected": -2.913384437561035, "logps/chosen": -247.629150390625, "logps/rejected": -216.33143615722656, "loss": 0.4153, "rewards/accuracies": 0.875, "rewards/chosen": -0.3600773513317108, "rewards/margins": 1.2763354778289795, "rewards/rejected": -1.6364127397537231, "step": 5862 }, { "epoch": 0.68, "learning_rate": 9.871239611377735e-08, "logits/chosen": -3.811727523803711, "logits/rejected": -3.5451178550720215, "logps/chosen": -240.30506896972656, "logps/rejected": -277.1090087890625, "loss": 0.3306, "rewards/accuracies": 0.75, "rewards/chosen": -0.07399120926856995, "rewards/margins": 2.857959032058716, "rewards/rejected": -2.931950092315674, "step": 5863 }, { "epoch": 0.68, "learning_rate": 9.86772796441531e-08, "logits/chosen": -2.823202610015869, "logits/rejected": -2.8664822578430176, "logps/chosen": -332.12860107421875, "logps/rejected": -318.6343994140625, "loss": 0.3264, "rewards/accuracies": 0.875, "rewards/chosen": 0.35973140597343445, "rewards/margins": 1.6432604789733887, "rewards/rejected": -1.2835290431976318, "step": 5864 }, { "epoch": 0.68, "learning_rate": 9.864216317452884e-08, "logits/chosen": -3.41549015045166, "logits/rejected": -3.5235023498535156, "logps/chosen": -178.75234985351562, "logps/rejected": -202.8977813720703, "loss": 0.2819, "rewards/accuracies": 0.875, "rewards/chosen": 0.41884109377861023, "rewards/margins": 1.746932029724121, "rewards/rejected": -1.328090786933899, "step": 5865 }, { "epoch": 0.68, "learning_rate": 9.86070467049046e-08, "logits/chosen": -2.6146955490112305, "logits/rejected": -2.742666721343994, "logps/chosen": -146.34173583984375, "logps/rejected": -267.6336669921875, "loss": 0.1726, "rewards/accuracies": 1.0, "rewards/chosen": -0.12419556826353073, "rewards/margins": 2.6517715454101562, "rewards/rejected": -2.7759671211242676, "step": 5866 }, { "epoch": 0.68, "learning_rate": 9.857193023528034e-08, "logits/chosen": -3.233966112136841, "logits/rejected": -3.4024786949157715, "logps/chosen": -274.607421875, "logps/rejected": -193.4553680419922, "loss": 0.5457, "rewards/accuracies": 0.75, "rewards/chosen": -0.260707825422287, "rewards/margins": 0.7444044947624207, "rewards/rejected": -1.0051122903823853, "step": 5867 }, { "epoch": 0.68, "learning_rate": 9.853681376565608e-08, "logits/chosen": -3.29962158203125, "logits/rejected": -3.138601541519165, "logps/chosen": -252.79920959472656, "logps/rejected": -332.0163269042969, "loss": 0.3241, "rewards/accuracies": 0.75, "rewards/chosen": -0.3704460561275482, "rewards/margins": 1.6241704225540161, "rewards/rejected": -1.9946165084838867, "step": 5868 }, { "epoch": 0.68, "learning_rate": 9.850169729603182e-08, "logits/chosen": -3.7388415336608887, "logits/rejected": -3.3806042671203613, "logps/chosen": -282.8140869140625, "logps/rejected": -235.76132202148438, "loss": 0.2733, "rewards/accuracies": 0.875, "rewards/chosen": 0.1902931183576584, "rewards/margins": 2.6075892448425293, "rewards/rejected": -2.4172964096069336, "step": 5869 }, { "epoch": 0.68, "learning_rate": 9.846658082640759e-08, "logits/chosen": -3.194521903991699, "logits/rejected": -2.8490991592407227, "logps/chosen": -261.7351989746094, "logps/rejected": -92.43534851074219, "loss": 0.6156, "rewards/accuracies": 0.875, "rewards/chosen": -0.6942052841186523, "rewards/margins": 0.3979751169681549, "rewards/rejected": -1.0921803712844849, "step": 5870 }, { "epoch": 0.68, "learning_rate": 9.843146435678333e-08, "logits/chosen": -2.9638099670410156, "logits/rejected": -3.0124082565307617, "logps/chosen": -246.3319091796875, "logps/rejected": -188.26318359375, "loss": 0.2102, "rewards/accuracies": 0.875, "rewards/chosen": -0.0751928985118866, "rewards/margins": 2.242140531539917, "rewards/rejected": -2.317333459854126, "step": 5871 }, { "epoch": 0.68, "learning_rate": 9.839634788715907e-08, "logits/chosen": -2.9471092224121094, "logits/rejected": -2.945946455001831, "logps/chosen": -389.36529541015625, "logps/rejected": -247.71109008789062, "loss": 0.6811, "rewards/accuracies": 0.625, "rewards/chosen": -0.3161923587322235, "rewards/margins": 1.4977154731750488, "rewards/rejected": -1.8139076232910156, "step": 5872 }, { "epoch": 0.68, "learning_rate": 9.836123141753481e-08, "logits/chosen": -3.081059455871582, "logits/rejected": -3.056532382965088, "logps/chosen": -264.02947998046875, "logps/rejected": -176.89459228515625, "loss": 0.5698, "rewards/accuracies": 0.75, "rewards/chosen": -0.17406967282295227, "rewards/margins": 1.549403190612793, "rewards/rejected": -1.7234728336334229, "step": 5873 }, { "epoch": 0.68, "learning_rate": 9.832611494791057e-08, "logits/chosen": -2.8679544925689697, "logits/rejected": -2.7538418769836426, "logps/chosen": -216.33370971679688, "logps/rejected": -189.82852172851562, "loss": 0.5979, "rewards/accuracies": 0.625, "rewards/chosen": -0.6232277750968933, "rewards/margins": 1.1574459075927734, "rewards/rejected": -1.7806737422943115, "step": 5874 }, { "epoch": 0.68, "learning_rate": 9.829099847828631e-08, "logits/chosen": -2.838540554046631, "logits/rejected": -2.8779385089874268, "logps/chosen": -164.7882080078125, "logps/rejected": -194.6588134765625, "loss": 0.2844, "rewards/accuracies": 0.875, "rewards/chosen": -0.013573884963989258, "rewards/margins": 1.6807608604431152, "rewards/rejected": -1.694334626197815, "step": 5875 }, { "epoch": 0.68, "learning_rate": 9.825588200866206e-08, "logits/chosen": -3.4519646167755127, "logits/rejected": -3.3657784461975098, "logps/chosen": -296.53765869140625, "logps/rejected": -245.7030487060547, "loss": 0.5944, "rewards/accuracies": 0.75, "rewards/chosen": -0.043736085295677185, "rewards/margins": 1.9400216341018677, "rewards/rejected": -1.9837578535079956, "step": 5876 }, { "epoch": 0.68, "learning_rate": 9.82207655390378e-08, "logits/chosen": -3.288424491882324, "logits/rejected": -3.8043079376220703, "logps/chosen": -58.185569763183594, "logps/rejected": -236.12933349609375, "loss": 0.1753, "rewards/accuracies": 0.875, "rewards/chosen": 0.18835115432739258, "rewards/margins": 2.941967487335205, "rewards/rejected": -2.7536165714263916, "step": 5877 }, { "epoch": 0.68, "learning_rate": 9.818564906941356e-08, "logits/chosen": -2.6230850219726562, "logits/rejected": -2.8278255462646484, "logps/chosen": -364.2283020019531, "logps/rejected": -321.23175048828125, "loss": 0.5218, "rewards/accuracies": 0.875, "rewards/chosen": -0.5488657355308533, "rewards/margins": 1.4745985269546509, "rewards/rejected": -2.0234644412994385, "step": 5878 }, { "epoch": 0.68, "learning_rate": 9.81505325997893e-08, "logits/chosen": -3.81003737449646, "logits/rejected": -3.7499918937683105, "logps/chosen": -259.1098937988281, "logps/rejected": -265.09661865234375, "loss": 0.2907, "rewards/accuracies": 0.875, "rewards/chosen": 0.02219712734222412, "rewards/margins": 1.85244882106781, "rewards/rejected": -1.8302515745162964, "step": 5879 }, { "epoch": 0.68, "learning_rate": 9.811541613016504e-08, "logits/chosen": -3.1402573585510254, "logits/rejected": -3.1011619567871094, "logps/chosen": -165.13050842285156, "logps/rejected": -231.5508270263672, "loss": 0.5163, "rewards/accuracies": 0.75, "rewards/chosen": 0.003266632556915283, "rewards/margins": 1.9961786270141602, "rewards/rejected": -1.9929120540618896, "step": 5880 }, { "epoch": 0.68, "learning_rate": 9.808029966054078e-08, "logits/chosen": -3.083706855773926, "logits/rejected": -3.110898494720459, "logps/chosen": -310.09210205078125, "logps/rejected": -293.1856689453125, "loss": 0.553, "rewards/accuracies": 0.75, "rewards/chosen": -0.8207219839096069, "rewards/margins": 1.0629709959030151, "rewards/rejected": -1.883692979812622, "step": 5881 }, { "epoch": 0.68, "learning_rate": 9.804518319091655e-08, "logits/chosen": -3.1197102069854736, "logits/rejected": -3.37087345123291, "logps/chosen": -334.97894287109375, "logps/rejected": -343.4999694824219, "loss": 0.4576, "rewards/accuracies": 0.625, "rewards/chosen": 0.5193545818328857, "rewards/margins": 2.6248574256896973, "rewards/rejected": -2.1055030822753906, "step": 5882 }, { "epoch": 0.68, "learning_rate": 9.801006672129229e-08, "logits/chosen": -2.979750156402588, "logits/rejected": -3.1595311164855957, "logps/chosen": -272.1949768066406, "logps/rejected": -189.91856384277344, "loss": 0.1269, "rewards/accuracies": 1.0, "rewards/chosen": 0.16188693046569824, "rewards/margins": 2.8442046642303467, "rewards/rejected": -2.6823177337646484, "step": 5883 }, { "epoch": 0.68, "learning_rate": 9.797495025166803e-08, "logits/chosen": -3.3060319423675537, "logits/rejected": -3.2739510536193848, "logps/chosen": -199.9860076904297, "logps/rejected": -338.5311584472656, "loss": 0.1203, "rewards/accuracies": 1.0, "rewards/chosen": 0.10825134068727493, "rewards/margins": 3.75469970703125, "rewards/rejected": -3.6464486122131348, "step": 5884 }, { "epoch": 0.68, "learning_rate": 9.793983378204377e-08, "logits/chosen": -2.81729793548584, "logits/rejected": -2.5747714042663574, "logps/chosen": -218.89434814453125, "logps/rejected": -279.6658935546875, "loss": 0.9164, "rewards/accuracies": 0.25, "rewards/chosen": -0.4768143892288208, "rewards/margins": -0.10084934532642365, "rewards/rejected": -0.37596502900123596, "step": 5885 }, { "epoch": 0.68, "learning_rate": 9.790471731241952e-08, "logits/chosen": -4.174584865570068, "logits/rejected": -3.3945436477661133, "logps/chosen": -282.98876953125, "logps/rejected": -174.14517211914062, "loss": 0.3848, "rewards/accuracies": 0.875, "rewards/chosen": -0.09973639249801636, "rewards/margins": 1.7683066129684448, "rewards/rejected": -1.8680429458618164, "step": 5886 }, { "epoch": 0.68, "learning_rate": 9.786960084279527e-08, "logits/chosen": -3.1110966205596924, "logits/rejected": -3.1979238986968994, "logps/chosen": -328.7284851074219, "logps/rejected": -319.1300048828125, "loss": 0.3117, "rewards/accuracies": 0.875, "rewards/chosen": -0.10484620928764343, "rewards/margins": 2.649855136871338, "rewards/rejected": -2.7547013759613037, "step": 5887 }, { "epoch": 0.68, "learning_rate": 9.783448437317102e-08, "logits/chosen": -2.5969231128692627, "logits/rejected": -2.585523843765259, "logps/chosen": -252.16845703125, "logps/rejected": -286.13079833984375, "loss": 0.8252, "rewards/accuracies": 0.5, "rewards/chosen": -0.6280567646026611, "rewards/margins": 0.405137836933136, "rewards/rejected": -1.0331945419311523, "step": 5888 }, { "epoch": 0.68, "learning_rate": 9.779936790354676e-08, "logits/chosen": -3.793468952178955, "logits/rejected": -3.461768627166748, "logps/chosen": -265.3413391113281, "logps/rejected": -174.39877319335938, "loss": 0.4242, "rewards/accuracies": 0.875, "rewards/chosen": -0.4071347415447235, "rewards/margins": 1.0258357524871826, "rewards/rejected": -1.432970404624939, "step": 5889 }, { "epoch": 0.68, "learning_rate": 9.776425143392251e-08, "logits/chosen": -2.9926743507385254, "logits/rejected": -3.1489932537078857, "logps/chosen": -352.81988525390625, "logps/rejected": -244.9191436767578, "loss": 0.3261, "rewards/accuracies": 0.875, "rewards/chosen": -0.397050142288208, "rewards/margins": 1.7152795791625977, "rewards/rejected": -2.1123297214508057, "step": 5890 }, { "epoch": 0.68, "learning_rate": 9.772913496429825e-08, "logits/chosen": -2.9910154342651367, "logits/rejected": -2.745288133621216, "logps/chosen": -396.885986328125, "logps/rejected": -287.4437561035156, "loss": 0.7386, "rewards/accuracies": 0.375, "rewards/chosen": -0.9280754327774048, "rewards/margins": 0.4229375123977661, "rewards/rejected": -1.351012945175171, "step": 5891 }, { "epoch": 0.68, "learning_rate": 9.769401849467399e-08, "logits/chosen": -3.3607473373413086, "logits/rejected": -3.5347373485565186, "logps/chosen": -193.89651489257812, "logps/rejected": -204.18453979492188, "loss": 0.5605, "rewards/accuracies": 0.875, "rewards/chosen": -0.05268669128417969, "rewards/margins": 1.80087411403656, "rewards/rejected": -1.8535608053207397, "step": 5892 }, { "epoch": 0.68, "learning_rate": 9.765890202504975e-08, "logits/chosen": -2.92319655418396, "logits/rejected": -3.138502359390259, "logps/chosen": -191.7445831298828, "logps/rejected": -191.63958740234375, "loss": 0.3898, "rewards/accuracies": 0.875, "rewards/chosen": 0.09467813372612, "rewards/margins": 1.2776422500610352, "rewards/rejected": -1.1829640865325928, "step": 5893 }, { "epoch": 0.68, "learning_rate": 9.76237855554255e-08, "logits/chosen": -2.9740335941314697, "logits/rejected": -2.9942469596862793, "logps/chosen": -205.74679565429688, "logps/rejected": -326.3966369628906, "loss": 0.5167, "rewards/accuracies": 0.75, "rewards/chosen": -0.3622058033943176, "rewards/margins": 1.6815252304077148, "rewards/rejected": -2.0437309741973877, "step": 5894 }, { "epoch": 0.68, "learning_rate": 9.758866908580124e-08, "logits/chosen": -2.7646241188049316, "logits/rejected": -2.7888848781585693, "logps/chosen": -281.84906005859375, "logps/rejected": -407.03778076171875, "loss": 0.5642, "rewards/accuracies": 0.625, "rewards/chosen": -0.45783287286758423, "rewards/margins": 0.778406023979187, "rewards/rejected": -1.236238956451416, "step": 5895 }, { "epoch": 0.68, "learning_rate": 9.755355261617698e-08, "logits/chosen": -3.1143622398376465, "logits/rejected": -2.984952449798584, "logps/chosen": -193.03182983398438, "logps/rejected": -262.170166015625, "loss": 0.2048, "rewards/accuracies": 1.0, "rewards/chosen": -0.013126235455274582, "rewards/margins": 3.5639405250549316, "rewards/rejected": -3.5770668983459473, "step": 5896 }, { "epoch": 0.68, "learning_rate": 9.751843614655272e-08, "logits/chosen": -3.590216875076294, "logits/rejected": -3.7463057041168213, "logps/chosen": -311.6917724609375, "logps/rejected": -259.8072509765625, "loss": 0.4042, "rewards/accuracies": 0.75, "rewards/chosen": -0.34893396496772766, "rewards/margins": 2.4251718521118164, "rewards/rejected": -2.77410626411438, "step": 5897 }, { "epoch": 0.68, "learning_rate": 9.748331967692849e-08, "logits/chosen": -3.2604527473449707, "logits/rejected": -3.5903191566467285, "logps/chosen": -225.63352966308594, "logps/rejected": -242.15225219726562, "loss": 0.1739, "rewards/accuracies": 1.0, "rewards/chosen": -0.10515525192022324, "rewards/margins": 2.736217737197876, "rewards/rejected": -2.8413729667663574, "step": 5898 }, { "epoch": 0.68, "learning_rate": 9.744820320730423e-08, "logits/chosen": -2.968627691268921, "logits/rejected": -2.811392307281494, "logps/chosen": -239.76162719726562, "logps/rejected": -243.31930541992188, "loss": 0.2341, "rewards/accuracies": 1.0, "rewards/chosen": 0.10404235124588013, "rewards/margins": 1.9953150749206543, "rewards/rejected": -1.8912725448608398, "step": 5899 }, { "epoch": 0.68, "learning_rate": 9.741308673767997e-08, "logits/chosen": -3.719634532928467, "logits/rejected": -3.480433464050293, "logps/chosen": -260.4949035644531, "logps/rejected": -154.40673828125, "loss": 0.4187, "rewards/accuracies": 0.875, "rewards/chosen": 0.094922736287117, "rewards/margins": 1.4717769622802734, "rewards/rejected": -1.3768543004989624, "step": 5900 }, { "epoch": 0.68, "learning_rate": 9.737797026805571e-08, "logits/chosen": -3.0189483165740967, "logits/rejected": -2.7639288902282715, "logps/chosen": -237.5421600341797, "logps/rejected": -316.7267150878906, "loss": 0.3731, "rewards/accuracies": 0.875, "rewards/chosen": -0.15398219227790833, "rewards/margins": 1.8559207916259766, "rewards/rejected": -2.0099029541015625, "step": 5901 }, { "epoch": 0.68, "learning_rate": 9.734285379843146e-08, "logits/chosen": -2.857999801635742, "logits/rejected": -2.9296715259552, "logps/chosen": -382.718994140625, "logps/rejected": -329.0953369140625, "loss": 0.3721, "rewards/accuracies": 0.75, "rewards/chosen": 0.6838896870613098, "rewards/margins": 2.614898204803467, "rewards/rejected": -1.9310085773468018, "step": 5902 }, { "epoch": 0.68, "learning_rate": 9.73077373288072e-08, "logits/chosen": -2.695199966430664, "logits/rejected": -2.6645305156707764, "logps/chosen": -514.1402587890625, "logps/rejected": -298.4873046875, "loss": 0.6889, "rewards/accuracies": 0.75, "rewards/chosen": -0.36836111545562744, "rewards/margins": 0.9491144418716431, "rewards/rejected": -1.3174755573272705, "step": 5903 }, { "epoch": 0.68, "learning_rate": 9.727262085918296e-08, "logits/chosen": -3.167637825012207, "logits/rejected": -2.9751222133636475, "logps/chosen": -236.99647521972656, "logps/rejected": -172.13987731933594, "loss": 0.3138, "rewards/accuracies": 0.875, "rewards/chosen": 0.1070348471403122, "rewards/margins": 1.6412081718444824, "rewards/rejected": -1.5341730117797852, "step": 5904 }, { "epoch": 0.68, "learning_rate": 9.72375043895587e-08, "logits/chosen": -2.3025355339050293, "logits/rejected": -2.2213094234466553, "logps/chosen": -254.81289672851562, "logps/rejected": -166.6031036376953, "loss": 0.7376, "rewards/accuracies": 0.75, "rewards/chosen": -0.6056960225105286, "rewards/margins": 0.6519997119903564, "rewards/rejected": -1.2576956748962402, "step": 5905 }, { "epoch": 0.68, "learning_rate": 9.720238791993444e-08, "logits/chosen": -3.153719902038574, "logits/rejected": -3.16300368309021, "logps/chosen": -246.39857482910156, "logps/rejected": -277.1581726074219, "loss": 0.7555, "rewards/accuracies": 0.5, "rewards/chosen": -0.5568031072616577, "rewards/margins": 0.0857199877500534, "rewards/rejected": -0.6425230503082275, "step": 5906 }, { "epoch": 0.68, "learning_rate": 9.716727145031019e-08, "logits/chosen": -3.4165456295013428, "logits/rejected": -3.545555353164673, "logps/chosen": -106.32081604003906, "logps/rejected": -189.77041625976562, "loss": 0.3703, "rewards/accuracies": 0.875, "rewards/chosen": -0.07883264869451523, "rewards/margins": 1.639039397239685, "rewards/rejected": -1.717872142791748, "step": 5907 }, { "epoch": 0.68, "learning_rate": 9.713215498068593e-08, "logits/chosen": -3.312527656555176, "logits/rejected": -3.2539167404174805, "logps/chosen": -244.40625, "logps/rejected": -202.13272094726562, "loss": 0.3236, "rewards/accuracies": 0.875, "rewards/chosen": 0.0968976616859436, "rewards/margins": 1.4981966018676758, "rewards/rejected": -1.401298999786377, "step": 5908 }, { "epoch": 0.68, "learning_rate": 9.709703851106167e-08, "logits/chosen": -2.7766778469085693, "logits/rejected": -2.850224494934082, "logps/chosen": -241.97938537597656, "logps/rejected": -367.9206848144531, "loss": 0.2865, "rewards/accuracies": 0.875, "rewards/chosen": -0.004072621464729309, "rewards/margins": 2.3508472442626953, "rewards/rejected": -2.354919672012329, "step": 5909 }, { "epoch": 0.68, "learning_rate": 9.706192204143743e-08, "logits/chosen": -3.6115927696228027, "logits/rejected": -3.5794646739959717, "logps/chosen": -254.45274353027344, "logps/rejected": -235.61143493652344, "loss": 0.2871, "rewards/accuracies": 0.875, "rewards/chosen": 0.029958456754684448, "rewards/margins": 1.6145838499069214, "rewards/rejected": -1.584625482559204, "step": 5910 }, { "epoch": 0.68, "learning_rate": 9.702680557181318e-08, "logits/chosen": -3.144989013671875, "logits/rejected": -3.3395748138427734, "logps/chosen": -350.95794677734375, "logps/rejected": -336.82403564453125, "loss": 0.342, "rewards/accuracies": 0.875, "rewards/chosen": 0.2829788625240326, "rewards/margins": 1.2018797397613525, "rewards/rejected": -0.9189009070396423, "step": 5911 }, { "epoch": 0.68, "learning_rate": 9.699168910218892e-08, "logits/chosen": -3.132169246673584, "logits/rejected": -2.8998970985412598, "logps/chosen": -265.65838623046875, "logps/rejected": -232.97451782226562, "loss": 0.61, "rewards/accuracies": 0.625, "rewards/chosen": -0.9042503237724304, "rewards/margins": 1.3849542140960693, "rewards/rejected": -2.2892045974731445, "step": 5912 }, { "epoch": 0.68, "learning_rate": 9.695657263256466e-08, "logits/chosen": -3.017947196960449, "logits/rejected": -3.2881298065185547, "logps/chosen": -297.63946533203125, "logps/rejected": -264.85906982421875, "loss": 0.4331, "rewards/accuracies": 0.875, "rewards/chosen": 0.10356324911117554, "rewards/margins": 1.4964240789413452, "rewards/rejected": -1.3928608894348145, "step": 5913 }, { "epoch": 0.68, "learning_rate": 9.69214561629404e-08, "logits/chosen": -2.6173129081726074, "logits/rejected": -2.616830587387085, "logps/chosen": -345.4765930175781, "logps/rejected": -373.86322021484375, "loss": 0.2847, "rewards/accuracies": 1.0, "rewards/chosen": 0.18101102113723755, "rewards/margins": 2.104949951171875, "rewards/rejected": -1.9239389896392822, "step": 5914 }, { "epoch": 0.68, "learning_rate": 9.688633969331617e-08, "logits/chosen": -3.4853265285491943, "logits/rejected": -3.9362049102783203, "logps/chosen": -316.37457275390625, "logps/rejected": -245.63131713867188, "loss": 0.8502, "rewards/accuracies": 0.75, "rewards/chosen": -0.6739528775215149, "rewards/margins": 1.7395710945129395, "rewards/rejected": -2.4135239124298096, "step": 5915 }, { "epoch": 0.68, "learning_rate": 9.685122322369191e-08, "logits/chosen": -3.5124106407165527, "logits/rejected": -4.004014492034912, "logps/chosen": -287.0138854980469, "logps/rejected": -290.85931396484375, "loss": 0.273, "rewards/accuracies": 0.75, "rewards/chosen": -0.505718469619751, "rewards/margins": 3.757826566696167, "rewards/rejected": -4.263545036315918, "step": 5916 }, { "epoch": 0.68, "learning_rate": 9.681610675406765e-08, "logits/chosen": -2.930224895477295, "logits/rejected": -2.7579970359802246, "logps/chosen": -176.7631072998047, "logps/rejected": -294.4903564453125, "loss": 0.246, "rewards/accuracies": 1.0, "rewards/chosen": -0.5332675576210022, "rewards/margins": 1.7678860425949097, "rewards/rejected": -2.3011536598205566, "step": 5917 }, { "epoch": 0.68, "learning_rate": 9.678099028444339e-08, "logits/chosen": -3.0618443489074707, "logits/rejected": -2.790306329727173, "logps/chosen": -350.218994140625, "logps/rejected": -458.3529052734375, "loss": 0.2139, "rewards/accuracies": 0.875, "rewards/chosen": 0.3290901780128479, "rewards/margins": 2.8720502853393555, "rewards/rejected": -2.5429601669311523, "step": 5918 }, { "epoch": 0.68, "learning_rate": 9.674587381481915e-08, "logits/chosen": -3.8332104682922363, "logits/rejected": -3.7381396293640137, "logps/chosen": -475.32342529296875, "logps/rejected": -338.2590637207031, "loss": 0.1894, "rewards/accuracies": 0.875, "rewards/chosen": 0.4330303966999054, "rewards/margins": 2.584691047668457, "rewards/rejected": -2.151660919189453, "step": 5919 }, { "epoch": 0.68, "learning_rate": 9.671075734519489e-08, "logits/chosen": -2.9704089164733887, "logits/rejected": -3.250443935394287, "logps/chosen": -267.57647705078125, "logps/rejected": -297.0458984375, "loss": 0.4947, "rewards/accuracies": 0.875, "rewards/chosen": 0.08477458357810974, "rewards/margins": 3.1051831245422363, "rewards/rejected": -3.0204086303710938, "step": 5920 }, { "epoch": 0.68, "learning_rate": 9.667564087557064e-08, "logits/chosen": -3.5071747303009033, "logits/rejected": -2.9166362285614014, "logps/chosen": -332.71337890625, "logps/rejected": -240.1144561767578, "loss": 0.2389, "rewards/accuracies": 1.0, "rewards/chosen": -0.17036943137645721, "rewards/margins": 2.0181031227111816, "rewards/rejected": -2.1884727478027344, "step": 5921 }, { "epoch": 0.68, "learning_rate": 9.664052440594638e-08, "logits/chosen": -2.5795063972473145, "logits/rejected": -2.89300537109375, "logps/chosen": -218.0670928955078, "logps/rejected": -286.3052673339844, "loss": 0.2606, "rewards/accuracies": 0.875, "rewards/chosen": 0.3743932843208313, "rewards/margins": 2.2185218334198, "rewards/rejected": -1.8441284894943237, "step": 5922 }, { "epoch": 0.68, "learning_rate": 9.660540793632214e-08, "logits/chosen": -2.8650870323181152, "logits/rejected": -2.709016799926758, "logps/chosen": -167.69749450683594, "logps/rejected": -190.02243041992188, "loss": 0.2947, "rewards/accuracies": 0.875, "rewards/chosen": 0.17263279855251312, "rewards/margins": 1.8255832195281982, "rewards/rejected": -1.6529505252838135, "step": 5923 }, { "epoch": 0.68, "learning_rate": 9.657029146669788e-08, "logits/chosen": -3.473989963531494, "logits/rejected": -3.223184823989868, "logps/chosen": -302.7011413574219, "logps/rejected": -251.4551239013672, "loss": 0.1799, "rewards/accuracies": 1.0, "rewards/chosen": 0.08136235177516937, "rewards/margins": 2.0978527069091797, "rewards/rejected": -2.0164904594421387, "step": 5924 }, { "epoch": 0.68, "learning_rate": 9.653517499707362e-08, "logits/chosen": -2.3540353775024414, "logits/rejected": -2.541011333465576, "logps/chosen": -427.01348876953125, "logps/rejected": -277.15411376953125, "loss": 0.2653, "rewards/accuracies": 1.0, "rewards/chosen": 0.01970088481903076, "rewards/margins": 1.7999213933944702, "rewards/rejected": -1.780220627784729, "step": 5925 }, { "epoch": 0.68, "learning_rate": 9.650005852744936e-08, "logits/chosen": -3.4029996395111084, "logits/rejected": -3.131751537322998, "logps/chosen": -162.26498413085938, "logps/rejected": -198.1903076171875, "loss": 0.4036, "rewards/accuracies": 0.75, "rewards/chosen": -0.13549277186393738, "rewards/margins": 1.0555747747421265, "rewards/rejected": -1.1910674571990967, "step": 5926 }, { "epoch": 0.68, "learning_rate": 9.646494205782512e-08, "logits/chosen": -3.43620228767395, "logits/rejected": -3.667235851287842, "logps/chosen": -188.52598571777344, "logps/rejected": -162.92849731445312, "loss": 0.3915, "rewards/accuracies": 0.75, "rewards/chosen": -1.1242705583572388, "rewards/margins": 1.9364968538284302, "rewards/rejected": -3.060767412185669, "step": 5927 }, { "epoch": 0.68, "learning_rate": 9.642982558820087e-08, "logits/chosen": -3.048177719116211, "logits/rejected": -3.1151814460754395, "logps/chosen": -174.2582550048828, "logps/rejected": -237.03492736816406, "loss": 0.574, "rewards/accuracies": 0.5, "rewards/chosen": -0.04447430372238159, "rewards/margins": 1.0580216646194458, "rewards/rejected": -1.1024960279464722, "step": 5928 }, { "epoch": 0.68, "learning_rate": 9.63947091185766e-08, "logits/chosen": -2.8805556297302246, "logits/rejected": -2.976654529571533, "logps/chosen": -243.81417846679688, "logps/rejected": -308.7523193359375, "loss": 0.6357, "rewards/accuracies": 0.5, "rewards/chosen": -0.6192861795425415, "rewards/margins": 2.32954740524292, "rewards/rejected": -2.948833465576172, "step": 5929 }, { "epoch": 0.68, "learning_rate": 9.635959264895235e-08, "logits/chosen": -3.084739923477173, "logits/rejected": -2.819614887237549, "logps/chosen": -211.7991943359375, "logps/rejected": -267.7151184082031, "loss": 0.4732, "rewards/accuracies": 0.875, "rewards/chosen": -0.30180293321609497, "rewards/margins": 0.6858354806900024, "rewards/rejected": -0.9876383543014526, "step": 5930 }, { "epoch": 0.68, "learning_rate": 9.63244761793281e-08, "logits/chosen": -3.4549076557159424, "logits/rejected": -2.938642978668213, "logps/chosen": -284.46844482421875, "logps/rejected": -223.9385986328125, "loss": 0.6408, "rewards/accuracies": 0.625, "rewards/chosen": -0.7684244513511658, "rewards/margins": 1.2583074569702148, "rewards/rejected": -2.0267319679260254, "step": 5931 }, { "epoch": 0.68, "learning_rate": 9.628935970970385e-08, "logits/chosen": -2.3775734901428223, "logits/rejected": -2.565157651901245, "logps/chosen": -247.88426208496094, "logps/rejected": -168.5254364013672, "loss": 0.4352, "rewards/accuracies": 0.75, "rewards/chosen": -0.12840840220451355, "rewards/margins": 2.082530975341797, "rewards/rejected": -2.210939407348633, "step": 5932 }, { "epoch": 0.68, "learning_rate": 9.62542432400796e-08, "logits/chosen": -3.575594425201416, "logits/rejected": -3.0210154056549072, "logps/chosen": -633.0364990234375, "logps/rejected": -372.78143310546875, "loss": 0.498, "rewards/accuracies": 0.625, "rewards/chosen": -0.11086101830005646, "rewards/margins": 0.6086575984954834, "rewards/rejected": -0.7195186614990234, "step": 5933 }, { "epoch": 0.68, "learning_rate": 9.621912677045534e-08, "logits/chosen": -3.1979904174804688, "logits/rejected": -3.3008577823638916, "logps/chosen": -161.5148162841797, "logps/rejected": -230.66336059570312, "loss": 0.5103, "rewards/accuracies": 0.75, "rewards/chosen": 0.4200167655944824, "rewards/margins": 1.2811305522918701, "rewards/rejected": -0.8611137270927429, "step": 5934 }, { "epoch": 0.68, "learning_rate": 9.618401030083109e-08, "logits/chosen": -2.8864078521728516, "logits/rejected": -3.017204761505127, "logps/chosen": -291.59283447265625, "logps/rejected": -246.93670654296875, "loss": 0.377, "rewards/accuracies": 0.875, "rewards/chosen": -0.02375376597046852, "rewards/margins": 3.2055587768554688, "rewards/rejected": -3.2293124198913574, "step": 5935 }, { "epoch": 0.68, "learning_rate": 9.614889383120683e-08, "logits/chosen": -3.1528825759887695, "logits/rejected": -3.2759273052215576, "logps/chosen": -377.1658935546875, "logps/rejected": -342.70843505859375, "loss": 0.5658, "rewards/accuracies": 0.75, "rewards/chosen": 0.10226978361606598, "rewards/margins": 1.3240816593170166, "rewards/rejected": -1.2218118906021118, "step": 5936 }, { "epoch": 0.68, "learning_rate": 9.611377736158257e-08, "logits/chosen": -2.653878688812256, "logits/rejected": -2.8724582195281982, "logps/chosen": -331.8513488769531, "logps/rejected": -279.42864990234375, "loss": 0.1838, "rewards/accuracies": 0.875, "rewards/chosen": 0.49364957213401794, "rewards/margins": 3.144731044769287, "rewards/rejected": -2.6510813236236572, "step": 5937 }, { "epoch": 0.68, "learning_rate": 9.607866089195832e-08, "logits/chosen": -3.3288676738739014, "logits/rejected": -3.400179147720337, "logps/chosen": -328.9051208496094, "logps/rejected": -266.5824279785156, "loss": 0.363, "rewards/accuracies": 0.75, "rewards/chosen": 0.11398360878229141, "rewards/margins": 1.3210585117340088, "rewards/rejected": -1.2070749998092651, "step": 5938 }, { "epoch": 0.68, "learning_rate": 9.604354442233408e-08, "logits/chosen": -3.4445133209228516, "logits/rejected": -3.1126811504364014, "logps/chosen": -277.7670593261719, "logps/rejected": -266.5044250488281, "loss": 0.1978, "rewards/accuracies": 0.875, "rewards/chosen": 0.6869149208068848, "rewards/margins": 3.1926522254943848, "rewards/rejected": -2.5057373046875, "step": 5939 }, { "epoch": 0.68, "learning_rate": 9.600842795270982e-08, "logits/chosen": -3.3059921264648438, "logits/rejected": -3.241529941558838, "logps/chosen": -264.0367126464844, "logps/rejected": -278.5604248046875, "loss": 0.7722, "rewards/accuracies": 0.75, "rewards/chosen": -0.6335735321044922, "rewards/margins": 0.4071235656738281, "rewards/rejected": -1.0406970977783203, "step": 5940 }, { "epoch": 0.68, "learning_rate": 9.597331148308556e-08, "logits/chosen": -2.3128836154937744, "logits/rejected": -2.287142753601074, "logps/chosen": -320.8565673828125, "logps/rejected": -315.33746337890625, "loss": 0.4496, "rewards/accuracies": 0.75, "rewards/chosen": -0.10860156267881393, "rewards/margins": 1.3183673620224, "rewards/rejected": -1.426969051361084, "step": 5941 }, { "epoch": 0.68, "learning_rate": 9.59381950134613e-08, "logits/chosen": -2.8769760131835938, "logits/rejected": -2.582190752029419, "logps/chosen": -285.3788757324219, "logps/rejected": -337.0663757324219, "loss": 0.3276, "rewards/accuracies": 0.875, "rewards/chosen": -0.08364415913820267, "rewards/margins": 3.229707717895508, "rewards/rejected": -3.313352108001709, "step": 5942 }, { "epoch": 0.69, "learning_rate": 9.590307854383707e-08, "logits/chosen": -2.9695239067077637, "logits/rejected": -2.7692606449127197, "logps/chosen": -88.38555145263672, "logps/rejected": -218.48121643066406, "loss": 0.3564, "rewards/accuracies": 0.875, "rewards/chosen": -0.17113028466701508, "rewards/margins": 1.4706065654754639, "rewards/rejected": -1.6417369842529297, "step": 5943 }, { "epoch": 0.69, "learning_rate": 9.586796207421281e-08, "logits/chosen": -2.291576385498047, "logits/rejected": -2.209322690963745, "logps/chosen": -343.2523498535156, "logps/rejected": -192.08343505859375, "loss": 0.3576, "rewards/accuracies": 0.875, "rewards/chosen": 0.022579893469810486, "rewards/margins": 1.2723634243011475, "rewards/rejected": -1.2497835159301758, "step": 5944 }, { "epoch": 0.69, "learning_rate": 9.583284560458855e-08, "logits/chosen": -4.107910633087158, "logits/rejected": -3.79162859916687, "logps/chosen": -476.7855529785156, "logps/rejected": -220.15277099609375, "loss": 0.3381, "rewards/accuracies": 0.875, "rewards/chosen": -0.906489372253418, "rewards/margins": 2.443168878555298, "rewards/rejected": -3.349658489227295, "step": 5945 }, { "epoch": 0.69, "learning_rate": 9.579772913496429e-08, "logits/chosen": -4.056990623474121, "logits/rejected": -3.42629337310791, "logps/chosen": -441.8648681640625, "logps/rejected": -270.5398254394531, "loss": 0.5003, "rewards/accuracies": 0.875, "rewards/chosen": -0.1516563445329666, "rewards/margins": 1.85740327835083, "rewards/rejected": -2.0090596675872803, "step": 5946 }, { "epoch": 0.69, "learning_rate": 9.576261266534004e-08, "logits/chosen": -3.0301432609558105, "logits/rejected": -2.650777578353882, "logps/chosen": -135.35862731933594, "logps/rejected": -297.5038146972656, "loss": 0.4611, "rewards/accuracies": 0.625, "rewards/chosen": -0.45772993564605713, "rewards/margins": 1.2519938945770264, "rewards/rejected": -1.7097238302230835, "step": 5947 }, { "epoch": 0.69, "learning_rate": 9.572749619571578e-08, "logits/chosen": -3.1922833919525146, "logits/rejected": -3.2663650512695312, "logps/chosen": -252.80966186523438, "logps/rejected": -242.44398498535156, "loss": 0.3175, "rewards/accuracies": 0.75, "rewards/chosen": -0.09758926182985306, "rewards/margins": 2.984285831451416, "rewards/rejected": -3.0818748474121094, "step": 5948 }, { "epoch": 0.69, "learning_rate": 9.569237972609154e-08, "logits/chosen": -3.6628801822662354, "logits/rejected": -3.0616745948791504, "logps/chosen": -433.5910339355469, "logps/rejected": -333.8158874511719, "loss": 0.4609, "rewards/accuracies": 0.75, "rewards/chosen": -0.653103232383728, "rewards/margins": 1.9750224351882935, "rewards/rejected": -2.6281256675720215, "step": 5949 }, { "epoch": 0.69, "learning_rate": 9.565726325646728e-08, "logits/chosen": -3.2926175594329834, "logits/rejected": -3.673074722290039, "logps/chosen": -427.3772277832031, "logps/rejected": -219.54449462890625, "loss": 0.8796, "rewards/accuracies": 0.625, "rewards/chosen": -1.043566107749939, "rewards/margins": 0.2650068402290344, "rewards/rejected": -1.308572769165039, "step": 5950 }, { "epoch": 0.69, "learning_rate": 9.562214678684303e-08, "logits/chosen": -2.7589492797851562, "logits/rejected": -2.8663904666900635, "logps/chosen": -93.24149322509766, "logps/rejected": -184.89462280273438, "loss": 0.3335, "rewards/accuracies": 0.875, "rewards/chosen": -0.10645896196365356, "rewards/margins": 1.707930326461792, "rewards/rejected": -1.8143892288208008, "step": 5951 }, { "epoch": 0.69, "learning_rate": 9.558703031721877e-08, "logits/chosen": -2.928055763244629, "logits/rejected": -2.813666343688965, "logps/chosen": -427.10235595703125, "logps/rejected": -371.71026611328125, "loss": 0.8606, "rewards/accuracies": 0.625, "rewards/chosen": -0.28058332204818726, "rewards/margins": -0.011145040392875671, "rewards/rejected": -0.26943832635879517, "step": 5952 }, { "epoch": 0.69, "learning_rate": 9.555191384759451e-08, "logits/chosen": -2.3540968894958496, "logits/rejected": -2.4005236625671387, "logps/chosen": -241.87937927246094, "logps/rejected": -217.00381469726562, "loss": 0.5694, "rewards/accuracies": 0.75, "rewards/chosen": -0.3503603935241699, "rewards/margins": 0.824764609336853, "rewards/rejected": -1.1751251220703125, "step": 5953 }, { "epoch": 0.69, "learning_rate": 9.551679737797025e-08, "logits/chosen": -3.3043580055236816, "logits/rejected": -3.236867666244507, "logps/chosen": -265.4062805175781, "logps/rejected": -286.9897766113281, "loss": 0.4298, "rewards/accuracies": 0.75, "rewards/chosen": 0.17976826429367065, "rewards/margins": 1.8546411991119385, "rewards/rejected": -1.6748731136322021, "step": 5954 }, { "epoch": 0.69, "learning_rate": 9.548168090834601e-08, "logits/chosen": -3.4081180095672607, "logits/rejected": -3.2437658309936523, "logps/chosen": -112.73483276367188, "logps/rejected": -170.04676818847656, "loss": 0.5379, "rewards/accuracies": 0.875, "rewards/chosen": -0.4003189206123352, "rewards/margins": 0.4541509747505188, "rewards/rejected": -0.8544698357582092, "step": 5955 }, { "epoch": 0.69, "learning_rate": 9.544656443872176e-08, "logits/chosen": -2.455901622772217, "logits/rejected": -2.83292818069458, "logps/chosen": -356.8276062011719, "logps/rejected": -307.1011047363281, "loss": 0.1457, "rewards/accuracies": 1.0, "rewards/chosen": 0.13973775506019592, "rewards/margins": 3.1099796295166016, "rewards/rejected": -2.9702420234680176, "step": 5956 }, { "epoch": 0.69, "learning_rate": 9.54114479690975e-08, "logits/chosen": -3.1703572273254395, "logits/rejected": -3.234567880630493, "logps/chosen": -237.1409149169922, "logps/rejected": -226.9910888671875, "loss": 0.2667, "rewards/accuracies": 0.875, "rewards/chosen": -0.007460739463567734, "rewards/margins": 3.1464643478393555, "rewards/rejected": -3.1539249420166016, "step": 5957 }, { "epoch": 0.69, "learning_rate": 9.537633149947324e-08, "logits/chosen": -3.0692288875579834, "logits/rejected": -3.1069350242614746, "logps/chosen": -327.1402282714844, "logps/rejected": -210.0907745361328, "loss": 0.1348, "rewards/accuracies": 1.0, "rewards/chosen": -0.10889193415641785, "rewards/margins": 2.741856336593628, "rewards/rejected": -2.8507485389709473, "step": 5958 }, { "epoch": 0.69, "learning_rate": 9.534121502984898e-08, "logits/chosen": -3.273867607116699, "logits/rejected": -3.134152412414551, "logps/chosen": -186.67947387695312, "logps/rejected": -208.53604125976562, "loss": 0.3716, "rewards/accuracies": 0.75, "rewards/chosen": 0.01019556075334549, "rewards/margins": 1.7677321434020996, "rewards/rejected": -1.75753653049469, "step": 5959 }, { "epoch": 0.69, "learning_rate": 9.530609856022475e-08, "logits/chosen": -3.6476054191589355, "logits/rejected": -3.098454475402832, "logps/chosen": -367.0373840332031, "logps/rejected": -192.65408325195312, "loss": 0.3221, "rewards/accuracies": 0.875, "rewards/chosen": -0.07294809818267822, "rewards/margins": 1.5632667541503906, "rewards/rejected": -1.6362148523330688, "step": 5960 }, { "epoch": 0.69, "learning_rate": 9.527098209060049e-08, "logits/chosen": -3.627934455871582, "logits/rejected": -3.026698112487793, "logps/chosen": -364.27777099609375, "logps/rejected": -220.4020233154297, "loss": 0.0882, "rewards/accuracies": 1.0, "rewards/chosen": 0.1829012632369995, "rewards/margins": 2.846214532852173, "rewards/rejected": -2.6633129119873047, "step": 5961 }, { "epoch": 0.69, "learning_rate": 9.523586562097623e-08, "logits/chosen": -2.890294075012207, "logits/rejected": -3.3801653385162354, "logps/chosen": -393.558837890625, "logps/rejected": -366.5351257324219, "loss": 0.3105, "rewards/accuracies": 0.875, "rewards/chosen": -0.11497075855731964, "rewards/margins": 2.3215389251708984, "rewards/rejected": -2.436509609222412, "step": 5962 }, { "epoch": 0.69, "learning_rate": 9.520074915135197e-08, "logits/chosen": -3.2424323558807373, "logits/rejected": -3.2124691009521484, "logps/chosen": -414.4670104980469, "logps/rejected": -296.4359436035156, "loss": 0.4639, "rewards/accuracies": 0.625, "rewards/chosen": -0.6098196506500244, "rewards/margins": 0.8998351097106934, "rewards/rejected": -1.5096547603607178, "step": 5963 }, { "epoch": 0.69, "learning_rate": 9.516563268172773e-08, "logits/chosen": -2.8502423763275146, "logits/rejected": -2.6561310291290283, "logps/chosen": -245.5434112548828, "logps/rejected": -205.0363006591797, "loss": 0.8091, "rewards/accuracies": 0.625, "rewards/chosen": -1.0279731750488281, "rewards/margins": 0.11572159826755524, "rewards/rejected": -1.1436948776245117, "step": 5964 }, { "epoch": 0.69, "learning_rate": 9.513051621210347e-08, "logits/chosen": -3.727949857711792, "logits/rejected": -3.889094829559326, "logps/chosen": -218.58102416992188, "logps/rejected": -190.48703002929688, "loss": 0.4329, "rewards/accuracies": 0.75, "rewards/chosen": 0.2271902710199356, "rewards/margins": 1.7468241453170776, "rewards/rejected": -1.5196338891983032, "step": 5965 }, { "epoch": 0.69, "learning_rate": 9.509539974247922e-08, "logits/chosen": -2.908378839492798, "logits/rejected": -2.687326431274414, "logps/chosen": -341.0576477050781, "logps/rejected": -289.0843811035156, "loss": 0.2906, "rewards/accuracies": 0.875, "rewards/chosen": -0.11070290207862854, "rewards/margins": 1.4641095399856567, "rewards/rejected": -1.574812412261963, "step": 5966 }, { "epoch": 0.69, "learning_rate": 9.506028327285496e-08, "logits/chosen": -3.1482057571411133, "logits/rejected": -3.180713176727295, "logps/chosen": -320.7928161621094, "logps/rejected": -304.8851623535156, "loss": 0.6697, "rewards/accuracies": 0.75, "rewards/chosen": -0.4279455542564392, "rewards/margins": 0.6166769862174988, "rewards/rejected": -1.044622540473938, "step": 5967 }, { "epoch": 0.69, "learning_rate": 9.502516680323072e-08, "logits/chosen": -3.1852614879608154, "logits/rejected": -3.2287983894348145, "logps/chosen": -199.5655059814453, "logps/rejected": -288.56402587890625, "loss": 0.318, "rewards/accuracies": 0.875, "rewards/chosen": -0.46303778886795044, "rewards/margins": 3.264240264892578, "rewards/rejected": -3.727278232574463, "step": 5968 }, { "epoch": 0.69, "learning_rate": 9.499005033360646e-08, "logits/chosen": -3.3747456073760986, "logits/rejected": -3.229358196258545, "logps/chosen": -199.97312927246094, "logps/rejected": -149.7859344482422, "loss": 0.4828, "rewards/accuracies": 0.625, "rewards/chosen": -0.027496159076690674, "rewards/margins": 0.8731014132499695, "rewards/rejected": -0.9005975127220154, "step": 5969 }, { "epoch": 0.69, "learning_rate": 9.49549338639822e-08, "logits/chosen": -3.018251419067383, "logits/rejected": -2.964137554168701, "logps/chosen": -137.13800048828125, "logps/rejected": -291.6165771484375, "loss": 0.1982, "rewards/accuracies": 0.875, "rewards/chosen": 0.05087370052933693, "rewards/margins": 2.2845890522003174, "rewards/rejected": -2.233715534210205, "step": 5970 }, { "epoch": 0.69, "learning_rate": 9.491981739435794e-08, "logits/chosen": -2.632338285446167, "logits/rejected": -2.4255783557891846, "logps/chosen": -287.4134826660156, "logps/rejected": -372.23565673828125, "loss": 0.384, "rewards/accuracies": 0.75, "rewards/chosen": 0.16477300226688385, "rewards/margins": 2.271850109100342, "rewards/rejected": -2.10707688331604, "step": 5971 }, { "epoch": 0.69, "learning_rate": 9.48847009247337e-08, "logits/chosen": -3.5910515785217285, "logits/rejected": -3.3889665603637695, "logps/chosen": -286.16497802734375, "logps/rejected": -273.6270751953125, "loss": 0.1225, "rewards/accuracies": 1.0, "rewards/chosen": 0.4751468002796173, "rewards/margins": 2.9975359439849854, "rewards/rejected": -2.5223889350891113, "step": 5972 }, { "epoch": 0.69, "learning_rate": 9.484958445510944e-08, "logits/chosen": -3.203512668609619, "logits/rejected": -3.0651135444641113, "logps/chosen": -239.86280822753906, "logps/rejected": -188.34681701660156, "loss": 0.3033, "rewards/accuracies": 0.75, "rewards/chosen": 0.19208219647407532, "rewards/margins": 2.7583422660827637, "rewards/rejected": -2.5662600994110107, "step": 5973 }, { "epoch": 0.69, "learning_rate": 9.481446798548519e-08, "logits/chosen": -3.206904172897339, "logits/rejected": -3.29807710647583, "logps/chosen": -195.0992431640625, "logps/rejected": -221.79190063476562, "loss": 0.3215, "rewards/accuracies": 0.875, "rewards/chosen": -0.14370226860046387, "rewards/margins": 1.8223820924758911, "rewards/rejected": -1.966084361076355, "step": 5974 }, { "epoch": 0.69, "learning_rate": 9.477935151586093e-08, "logits/chosen": -2.550497531890869, "logits/rejected": -2.4239213466644287, "logps/chosen": -283.4840393066406, "logps/rejected": -208.97262573242188, "loss": 0.4547, "rewards/accuracies": 0.75, "rewards/chosen": -0.0545407235622406, "rewards/margins": 3.0212268829345703, "rewards/rejected": -3.0757675170898438, "step": 5975 }, { "epoch": 0.69, "learning_rate": 9.474423504623669e-08, "logits/chosen": -2.2942302227020264, "logits/rejected": -2.590351104736328, "logps/chosen": -208.0811004638672, "logps/rejected": -284.2514953613281, "loss": 0.2812, "rewards/accuracies": 0.75, "rewards/chosen": -0.30359911918640137, "rewards/margins": 1.8282805681228638, "rewards/rejected": -2.1318798065185547, "step": 5976 }, { "epoch": 0.69, "learning_rate": 9.470911857661243e-08, "logits/chosen": -3.417524814605713, "logits/rejected": -3.3184328079223633, "logps/chosen": -181.62054443359375, "logps/rejected": -167.8997802734375, "loss": 0.1773, "rewards/accuracies": 0.875, "rewards/chosen": 0.2917332947254181, "rewards/margins": 3.0698933601379395, "rewards/rejected": -2.7781600952148438, "step": 5977 }, { "epoch": 0.69, "learning_rate": 9.467400210698817e-08, "logits/chosen": -2.996770143508911, "logits/rejected": -3.0613954067230225, "logps/chosen": -221.32005310058594, "logps/rejected": -211.82766723632812, "loss": 0.1512, "rewards/accuracies": 1.0, "rewards/chosen": 0.3448389768600464, "rewards/margins": 3.677300453186035, "rewards/rejected": -3.3324618339538574, "step": 5978 }, { "epoch": 0.69, "learning_rate": 9.463888563736391e-08, "logits/chosen": -2.9890193939208984, "logits/rejected": -3.0928499698638916, "logps/chosen": -219.18643188476562, "logps/rejected": -272.91766357421875, "loss": 0.2358, "rewards/accuracies": 1.0, "rewards/chosen": 0.2659527063369751, "rewards/margins": 1.997556209564209, "rewards/rejected": -1.7316036224365234, "step": 5979 }, { "epoch": 0.69, "learning_rate": 9.460376916773967e-08, "logits/chosen": -2.972804546356201, "logits/rejected": -3.0036230087280273, "logps/chosen": -215.41224670410156, "logps/rejected": -234.08358764648438, "loss": 0.3777, "rewards/accuracies": 0.75, "rewards/chosen": 0.34834349155426025, "rewards/margins": 2.4757518768310547, "rewards/rejected": -2.127408266067505, "step": 5980 }, { "epoch": 0.69, "learning_rate": 9.456865269811541e-08, "logits/chosen": -2.8477730751037598, "logits/rejected": -2.952570676803589, "logps/chosen": -217.8678741455078, "logps/rejected": -234.38729858398438, "loss": 0.2196, "rewards/accuracies": 1.0, "rewards/chosen": 0.3277289569377899, "rewards/margins": 2.2701382637023926, "rewards/rejected": -1.9424091577529907, "step": 5981 }, { "epoch": 0.69, "learning_rate": 9.453353622849115e-08, "logits/chosen": -3.1859121322631836, "logits/rejected": -2.6044540405273438, "logps/chosen": -168.84429931640625, "logps/rejected": -102.67850494384766, "loss": 0.7698, "rewards/accuracies": 0.5, "rewards/chosen": -0.5869159698486328, "rewards/margins": 0.3254069685935974, "rewards/rejected": -0.912322998046875, "step": 5982 }, { "epoch": 0.69, "learning_rate": 9.44984197588669e-08, "logits/chosen": -3.081364154815674, "logits/rejected": -3.1458065509796143, "logps/chosen": -505.6863098144531, "logps/rejected": -444.40240478515625, "loss": 0.3087, "rewards/accuracies": 0.875, "rewards/chosen": 0.24603641033172607, "rewards/margins": 1.670236349105835, "rewards/rejected": -1.4241998195648193, "step": 5983 }, { "epoch": 0.69, "learning_rate": 9.446330328924266e-08, "logits/chosen": -2.8908116817474365, "logits/rejected": -3.1113739013671875, "logps/chosen": -227.50222778320312, "logps/rejected": -291.6514892578125, "loss": 0.2941, "rewards/accuracies": 0.875, "rewards/chosen": 0.023221462965011597, "rewards/margins": 2.510592460632324, "rewards/rejected": -2.4873709678649902, "step": 5984 }, { "epoch": 0.69, "learning_rate": 9.44281868196184e-08, "logits/chosen": -3.5340750217437744, "logits/rejected": -3.1018457412719727, "logps/chosen": -283.7449645996094, "logps/rejected": -168.56517028808594, "loss": 0.5044, "rewards/accuracies": 0.75, "rewards/chosen": -0.09705743938684464, "rewards/margins": 0.7430834770202637, "rewards/rejected": -0.8401408791542053, "step": 5985 }, { "epoch": 0.69, "learning_rate": 9.439307034999414e-08, "logits/chosen": -3.405971050262451, "logits/rejected": -3.4541094303131104, "logps/chosen": -216.0741424560547, "logps/rejected": -233.92881774902344, "loss": 0.2123, "rewards/accuracies": 1.0, "rewards/chosen": 0.2612106502056122, "rewards/margins": 2.4985809326171875, "rewards/rejected": -2.237370252609253, "step": 5986 }, { "epoch": 0.69, "learning_rate": 9.435795388036988e-08, "logits/chosen": -2.7367215156555176, "logits/rejected": -2.8680882453918457, "logps/chosen": -359.45538330078125, "logps/rejected": -428.4845275878906, "loss": 0.3563, "rewards/accuracies": 1.0, "rewards/chosen": 0.36730462312698364, "rewards/margins": 1.0211259126663208, "rewards/rejected": -0.6538212895393372, "step": 5987 }, { "epoch": 0.69, "learning_rate": 9.432283741074565e-08, "logits/chosen": -3.370603561401367, "logits/rejected": -3.1870555877685547, "logps/chosen": -267.0539245605469, "logps/rejected": -204.13748168945312, "loss": 1.4681, "rewards/accuracies": 0.875, "rewards/chosen": -1.4085843563079834, "rewards/margins": 0.45882683992385864, "rewards/rejected": -1.8674112558364868, "step": 5988 }, { "epoch": 0.69, "learning_rate": 9.428772094112139e-08, "logits/chosen": -3.0258140563964844, "logits/rejected": -3.222902297973633, "logps/chosen": -188.02671813964844, "logps/rejected": -228.3857879638672, "loss": 0.2306, "rewards/accuracies": 0.875, "rewards/chosen": 0.15379658341407776, "rewards/margins": 2.515307903289795, "rewards/rejected": -2.361511468887329, "step": 5989 }, { "epoch": 0.69, "learning_rate": 9.425260447149713e-08, "logits/chosen": -2.7538259029388428, "logits/rejected": -3.0795135498046875, "logps/chosen": -170.61883544921875, "logps/rejected": -269.06756591796875, "loss": 0.3611, "rewards/accuracies": 0.75, "rewards/chosen": -0.43979334831237793, "rewards/margins": 2.4787349700927734, "rewards/rejected": -2.9185280799865723, "step": 5990 }, { "epoch": 0.69, "learning_rate": 9.421748800187287e-08, "logits/chosen": -3.9834625720977783, "logits/rejected": -3.8164830207824707, "logps/chosen": -85.78785705566406, "logps/rejected": -95.08564758300781, "loss": 0.7604, "rewards/accuracies": 0.5, "rewards/chosen": -0.2367699146270752, "rewards/margins": 0.8676185011863708, "rewards/rejected": -1.1043884754180908, "step": 5991 }, { "epoch": 0.69, "learning_rate": 9.418237153224862e-08, "logits/chosen": -3.880265712738037, "logits/rejected": -3.884253740310669, "logps/chosen": -207.01966857910156, "logps/rejected": -230.61669921875, "loss": 0.6254, "rewards/accuracies": 0.5, "rewards/chosen": -0.684291422367096, "rewards/margins": 0.5095913410186768, "rewards/rejected": -1.193882703781128, "step": 5992 }, { "epoch": 0.69, "learning_rate": 9.414725506262438e-08, "logits/chosen": -2.5358402729034424, "logits/rejected": -2.477332592010498, "logps/chosen": -380.142333984375, "logps/rejected": -328.9883117675781, "loss": 0.3467, "rewards/accuracies": 0.75, "rewards/chosen": 0.27235597372055054, "rewards/margins": 1.8779215812683105, "rewards/rejected": -1.6055656671524048, "step": 5993 }, { "epoch": 0.69, "learning_rate": 9.411213859300012e-08, "logits/chosen": -2.611980438232422, "logits/rejected": -2.4021310806274414, "logps/chosen": -197.52935791015625, "logps/rejected": -399.71771240234375, "loss": 0.5582, "rewards/accuracies": 0.625, "rewards/chosen": -0.35566315054893494, "rewards/margins": 1.4032495021820068, "rewards/rejected": -1.7589126825332642, "step": 5994 }, { "epoch": 0.69, "learning_rate": 9.407702212337586e-08, "logits/chosen": -2.7740426063537598, "logits/rejected": -3.051816463470459, "logps/chosen": -207.21588134765625, "logps/rejected": -198.9026336669922, "loss": 0.2614, "rewards/accuracies": 0.875, "rewards/chosen": 0.09621091187000275, "rewards/margins": 2.38224720954895, "rewards/rejected": -2.286036252975464, "step": 5995 }, { "epoch": 0.69, "learning_rate": 9.404190565375161e-08, "logits/chosen": -3.5111777782440186, "logits/rejected": -3.4815497398376465, "logps/chosen": -124.25013732910156, "logps/rejected": -173.6617889404297, "loss": 0.2727, "rewards/accuracies": 1.0, "rewards/chosen": 0.45289158821105957, "rewards/margins": 1.5268901586532593, "rewards/rejected": -1.0739984512329102, "step": 5996 }, { "epoch": 0.69, "learning_rate": 9.400678918412735e-08, "logits/chosen": -2.5677411556243896, "logits/rejected": -2.632093667984009, "logps/chosen": -275.3363037109375, "logps/rejected": -223.22439575195312, "loss": 0.6761, "rewards/accuracies": 0.875, "rewards/chosen": -0.9366806745529175, "rewards/margins": 0.979651927947998, "rewards/rejected": -1.9163326025009155, "step": 5997 }, { "epoch": 0.69, "learning_rate": 9.397167271450309e-08, "logits/chosen": -3.3663253784179688, "logits/rejected": -3.0877747535705566, "logps/chosen": -350.88836669921875, "logps/rejected": -194.8474578857422, "loss": 0.3425, "rewards/accuracies": 0.875, "rewards/chosen": 0.17219853401184082, "rewards/margins": 1.412427544593811, "rewards/rejected": -1.2402288913726807, "step": 5998 }, { "epoch": 0.69, "learning_rate": 9.393655624487883e-08, "logits/chosen": -3.1187140941619873, "logits/rejected": -2.877419948577881, "logps/chosen": -169.95843505859375, "logps/rejected": -208.35560607910156, "loss": 0.2511, "rewards/accuracies": 1.0, "rewards/chosen": -0.22736942768096924, "rewards/margins": 2.379943609237671, "rewards/rejected": -2.6073129177093506, "step": 5999 }, { "epoch": 0.69, "learning_rate": 9.39014397752546e-08, "logits/chosen": -3.1150925159454346, "logits/rejected": -2.6974105834960938, "logps/chosen": -219.62974548339844, "logps/rejected": -166.36656188964844, "loss": 0.3898, "rewards/accuracies": 0.625, "rewards/chosen": -0.12611934542655945, "rewards/margins": 1.7597343921661377, "rewards/rejected": -1.88585364818573, "step": 6000 }, { "epoch": 0.69, "eval_logits/chosen": -2.840113639831543, "eval_logits/rejected": -2.8027396202087402, "eval_logps/chosen": -293.7511291503906, "eval_logps/rejected": -237.29771423339844, "eval_loss": 0.43171972036361694, "eval_rewards/accuracies": 0.8142856955528259, "eval_rewards/chosen": 0.030380776152014732, "eval_rewards/margins": 1.3258912563323975, "eval_rewards/rejected": -1.2955104112625122, "eval_runtime": 32.523, "eval_samples_per_second": 2.152, "eval_steps_per_second": 1.076, "step": 6000 }, { "epoch": 0.69, "learning_rate": 9.386632330563034e-08, "logits/chosen": -3.4166324138641357, "logits/rejected": -3.652947425842285, "logps/chosen": -166.6906280517578, "logps/rejected": -265.2337341308594, "loss": 0.2666, "rewards/accuracies": 0.875, "rewards/chosen": 0.3159804940223694, "rewards/margins": 2.301502227783203, "rewards/rejected": -1.985521674156189, "step": 6001 }, { "epoch": 0.69, "learning_rate": 9.383120683600608e-08, "logits/chosen": -2.8372294902801514, "logits/rejected": -2.4821035861968994, "logps/chosen": -299.708251953125, "logps/rejected": -344.3417663574219, "loss": 0.9895, "rewards/accuracies": 0.25, "rewards/chosen": -0.5646206140518188, "rewards/margins": -0.008068457245826721, "rewards/rejected": -0.5565521717071533, "step": 6002 }, { "epoch": 0.69, "learning_rate": 9.379609036638182e-08, "logits/chosen": -3.6709907054901123, "logits/rejected": -3.3763673305511475, "logps/chosen": -364.5740051269531, "logps/rejected": -223.08412170410156, "loss": 0.6741, "rewards/accuracies": 0.75, "rewards/chosen": -0.13678961992263794, "rewards/margins": 1.1738712787628174, "rewards/rejected": -1.3106608390808105, "step": 6003 }, { "epoch": 0.69, "learning_rate": 9.376097389675756e-08, "logits/chosen": -3.5174202919006348, "logits/rejected": -3.6510329246520996, "logps/chosen": -261.3431396484375, "logps/rejected": -204.43133544921875, "loss": 0.4096, "rewards/accuracies": 0.75, "rewards/chosen": 0.08903439342975616, "rewards/margins": 1.520330786705017, "rewards/rejected": -1.4312963485717773, "step": 6004 }, { "epoch": 0.69, "learning_rate": 9.372585742713333e-08, "logits/chosen": -2.7782506942749023, "logits/rejected": -3.3736166954040527, "logps/chosen": -232.96768188476562, "logps/rejected": -163.9185791015625, "loss": 0.4041, "rewards/accuracies": 0.75, "rewards/chosen": 0.02696201205253601, "rewards/margins": 2.0504517555236816, "rewards/rejected": -2.0234897136688232, "step": 6005 }, { "epoch": 0.69, "learning_rate": 9.369074095750907e-08, "logits/chosen": -2.974097490310669, "logits/rejected": -3.1402993202209473, "logps/chosen": -145.9790802001953, "logps/rejected": -258.89520263671875, "loss": 0.3086, "rewards/accuracies": 0.75, "rewards/chosen": -0.6514614820480347, "rewards/margins": 2.5739972591400146, "rewards/rejected": -3.2254586219787598, "step": 6006 }, { "epoch": 0.69, "learning_rate": 9.365562448788481e-08, "logits/chosen": -3.1007261276245117, "logits/rejected": -2.7689337730407715, "logps/chosen": -201.1221466064453, "logps/rejected": -228.416259765625, "loss": 0.3246, "rewards/accuracies": 0.875, "rewards/chosen": -0.2954845428466797, "rewards/margins": 1.5864882469177246, "rewards/rejected": -1.8819726705551147, "step": 6007 }, { "epoch": 0.69, "learning_rate": 9.362050801826055e-08, "logits/chosen": -3.7400693893432617, "logits/rejected": -3.3195958137512207, "logps/chosen": -357.97705078125, "logps/rejected": -224.19398498535156, "loss": 0.3045, "rewards/accuracies": 0.875, "rewards/chosen": 0.14970919489860535, "rewards/margins": 1.6260571479797363, "rewards/rejected": -1.476347804069519, "step": 6008 }, { "epoch": 0.69, "learning_rate": 9.35853915486363e-08, "logits/chosen": -3.2363712787628174, "logits/rejected": -3.261361598968506, "logps/chosen": -323.5926818847656, "logps/rejected": -309.9913024902344, "loss": 0.5962, "rewards/accuracies": 0.625, "rewards/chosen": -0.6718429327011108, "rewards/margins": 1.074379563331604, "rewards/rejected": -1.7462223768234253, "step": 6009 }, { "epoch": 0.69, "learning_rate": 9.355027507901206e-08, "logits/chosen": -3.067127227783203, "logits/rejected": -2.933277130126953, "logps/chosen": -349.05303955078125, "logps/rejected": -326.5354919433594, "loss": 0.2584, "rewards/accuracies": 0.875, "rewards/chosen": 0.11776372045278549, "rewards/margins": 2.3943281173706055, "rewards/rejected": -2.276564359664917, "step": 6010 }, { "epoch": 0.69, "learning_rate": 9.35151586093878e-08, "logits/chosen": -2.8958113193511963, "logits/rejected": -2.870753526687622, "logps/chosen": -325.14794921875, "logps/rejected": -208.59576416015625, "loss": 0.5354, "rewards/accuracies": 0.75, "rewards/chosen": -0.3097807765007019, "rewards/margins": 1.3124016523361206, "rewards/rejected": -1.6221823692321777, "step": 6011 }, { "epoch": 0.69, "learning_rate": 9.348004213976354e-08, "logits/chosen": -3.792510509490967, "logits/rejected": -3.6828527450561523, "logps/chosen": -228.98545837402344, "logps/rejected": -272.0115051269531, "loss": 0.3872, "rewards/accuracies": 0.875, "rewards/chosen": 0.047828566282987595, "rewards/margins": 2.398746967315674, "rewards/rejected": -2.3509182929992676, "step": 6012 }, { "epoch": 0.69, "learning_rate": 9.34449256701393e-08, "logits/chosen": -4.043623447418213, "logits/rejected": -3.8414411544799805, "logps/chosen": -168.51573181152344, "logps/rejected": -235.5950927734375, "loss": 0.37, "rewards/accuracies": 0.75, "rewards/chosen": 0.3390128016471863, "rewards/margins": 1.6986690759658813, "rewards/rejected": -1.3596562147140503, "step": 6013 }, { "epoch": 0.69, "learning_rate": 9.340980920051504e-08, "logits/chosen": -2.820889472961426, "logits/rejected": -2.903125524520874, "logps/chosen": -301.9021911621094, "logps/rejected": -285.06964111328125, "loss": 0.5357, "rewards/accuracies": 0.625, "rewards/chosen": -0.04051404446363449, "rewards/margins": 1.0360759496688843, "rewards/rejected": -1.0765900611877441, "step": 6014 }, { "epoch": 0.69, "learning_rate": 9.337469273089078e-08, "logits/chosen": -2.2991232872009277, "logits/rejected": -2.7903902530670166, "logps/chosen": -427.9427185058594, "logps/rejected": -405.8951110839844, "loss": 0.1182, "rewards/accuracies": 1.0, "rewards/chosen": 0.4940497875213623, "rewards/margins": 3.3739187717437744, "rewards/rejected": -2.879868984222412, "step": 6015 }, { "epoch": 0.69, "learning_rate": 9.333957626126652e-08, "logits/chosen": -3.739346981048584, "logits/rejected": -3.2264034748077393, "logps/chosen": -268.407470703125, "logps/rejected": -183.58482360839844, "loss": 0.3869, "rewards/accuracies": 0.75, "rewards/chosen": 0.18969745934009552, "rewards/margins": 1.2031750679016113, "rewards/rejected": -1.0134776830673218, "step": 6016 }, { "epoch": 0.69, "learning_rate": 9.330445979164228e-08, "logits/chosen": -2.258355140686035, "logits/rejected": -2.4010250568389893, "logps/chosen": -382.1170349121094, "logps/rejected": -228.71554565429688, "loss": 0.4252, "rewards/accuracies": 0.875, "rewards/chosen": 0.05198746919631958, "rewards/margins": 1.3811957836151123, "rewards/rejected": -1.3292083740234375, "step": 6017 }, { "epoch": 0.69, "learning_rate": 9.326934332201802e-08, "logits/chosen": -2.17256498336792, "logits/rejected": -2.321810245513916, "logps/chosen": -181.27728271484375, "logps/rejected": -142.69943237304688, "loss": 0.4074, "rewards/accuracies": 0.75, "rewards/chosen": -0.021023541688919067, "rewards/margins": 1.199907898902893, "rewards/rejected": -1.2209315299987793, "step": 6018 }, { "epoch": 0.69, "learning_rate": 9.323422685239376e-08, "logits/chosen": -3.2929561138153076, "logits/rejected": -3.318106174468994, "logps/chosen": -106.81647491455078, "logps/rejected": -252.80523681640625, "loss": 0.428, "rewards/accuracies": 0.75, "rewards/chosen": 0.5831893682479858, "rewards/margins": 2.2657158374786377, "rewards/rejected": -1.6825265884399414, "step": 6019 }, { "epoch": 0.69, "learning_rate": 9.31991103827695e-08, "logits/chosen": -2.964184284210205, "logits/rejected": -2.848526954650879, "logps/chosen": -291.1473388671875, "logps/rejected": -413.64776611328125, "loss": 0.219, "rewards/accuracies": 0.875, "rewards/chosen": 0.6281455159187317, "rewards/margins": 2.9774327278137207, "rewards/rejected": -2.349287271499634, "step": 6020 }, { "epoch": 0.69, "learning_rate": 9.316399391314527e-08, "logits/chosen": -3.2635743618011475, "logits/rejected": -2.6985838413238525, "logps/chosen": -340.9681091308594, "logps/rejected": -289.9105529785156, "loss": 0.4368, "rewards/accuracies": 0.875, "rewards/chosen": -0.42382657527923584, "rewards/margins": 1.9723930358886719, "rewards/rejected": -2.3962197303771973, "step": 6021 }, { "epoch": 0.69, "learning_rate": 9.312887744352101e-08, "logits/chosen": -2.9303948879241943, "logits/rejected": -2.678149700164795, "logps/chosen": -366.67620849609375, "logps/rejected": -285.82080078125, "loss": 0.4906, "rewards/accuracies": 0.75, "rewards/chosen": -0.39868390560150146, "rewards/margins": 1.9952020645141602, "rewards/rejected": -2.393886089324951, "step": 6022 }, { "epoch": 0.69, "learning_rate": 9.309376097389675e-08, "logits/chosen": -3.860288619995117, "logits/rejected": -3.5785651206970215, "logps/chosen": -299.2729187011719, "logps/rejected": -249.14332580566406, "loss": 0.3979, "rewards/accuracies": 0.75, "rewards/chosen": 0.3219054937362671, "rewards/margins": 1.4750010967254639, "rewards/rejected": -1.1530954837799072, "step": 6023 }, { "epoch": 0.69, "learning_rate": 9.30586445042725e-08, "logits/chosen": -3.0868611335754395, "logits/rejected": -3.164363145828247, "logps/chosen": -173.87757873535156, "logps/rejected": -183.06130981445312, "loss": 0.5535, "rewards/accuracies": 0.625, "rewards/chosen": 0.037305861711502075, "rewards/margins": 2.171280860900879, "rewards/rejected": -2.13397479057312, "step": 6024 }, { "epoch": 0.69, "learning_rate": 9.302352803464825e-08, "logits/chosen": -3.4106926918029785, "logits/rejected": -3.047288417816162, "logps/chosen": -320.242919921875, "logps/rejected": -192.03756713867188, "loss": 0.4767, "rewards/accuracies": 0.75, "rewards/chosen": -0.007163047790527344, "rewards/margins": 1.138852834701538, "rewards/rejected": -1.1460158824920654, "step": 6025 }, { "epoch": 0.69, "learning_rate": 9.298841156502399e-08, "logits/chosen": -3.3021469116210938, "logits/rejected": -3.368246078491211, "logps/chosen": -201.25120544433594, "logps/rejected": -191.1783447265625, "loss": 0.469, "rewards/accuracies": 0.625, "rewards/chosen": -0.06330351531505585, "rewards/margins": 1.2624703645706177, "rewards/rejected": -1.325774073600769, "step": 6026 }, { "epoch": 0.69, "learning_rate": 9.295329509539974e-08, "logits/chosen": -2.4582736492156982, "logits/rejected": -2.7870218753814697, "logps/chosen": -289.0015563964844, "logps/rejected": -260.88232421875, "loss": 0.2412, "rewards/accuracies": 0.875, "rewards/chosen": 0.42041948437690735, "rewards/margins": 2.8942625522613525, "rewards/rejected": -2.4738430976867676, "step": 6027 }, { "epoch": 0.69, "learning_rate": 9.291817862577548e-08, "logits/chosen": -2.6935644149780273, "logits/rejected": -2.7103610038757324, "logps/chosen": -236.2628173828125, "logps/rejected": -231.2149658203125, "loss": 0.4417, "rewards/accuracies": 0.625, "rewards/chosen": 0.11239853501319885, "rewards/margins": 1.3540091514587402, "rewards/rejected": -1.2416106462478638, "step": 6028 }, { "epoch": 0.7, "learning_rate": 9.288306215615124e-08, "logits/chosen": -3.088174819946289, "logits/rejected": -3.1170337200164795, "logps/chosen": -257.7061462402344, "logps/rejected": -415.2008056640625, "loss": 0.3826, "rewards/accuracies": 0.875, "rewards/chosen": 0.474758505821228, "rewards/margins": 1.8754202127456665, "rewards/rejected": -1.400661587715149, "step": 6029 }, { "epoch": 0.7, "learning_rate": 9.284794568652698e-08, "logits/chosen": -3.3040850162506104, "logits/rejected": -3.5361647605895996, "logps/chosen": -289.5772399902344, "logps/rejected": -251.05218505859375, "loss": 0.6128, "rewards/accuracies": 0.5, "rewards/chosen": -0.43226030468940735, "rewards/margins": 0.992901086807251, "rewards/rejected": -1.425161361694336, "step": 6030 }, { "epoch": 0.7, "learning_rate": 9.281282921690272e-08, "logits/chosen": -2.7515249252319336, "logits/rejected": -2.770698308944702, "logps/chosen": -187.07476806640625, "logps/rejected": -202.35443115234375, "loss": 0.6322, "rewards/accuracies": 0.875, "rewards/chosen": 0.01254485547542572, "rewards/margins": 0.9121779203414917, "rewards/rejected": -0.8996330499649048, "step": 6031 }, { "epoch": 0.7, "learning_rate": 9.277771274727846e-08, "logits/chosen": -3.8820619583129883, "logits/rejected": -3.919194221496582, "logps/chosen": -151.1553955078125, "logps/rejected": -144.10935974121094, "loss": 0.3449, "rewards/accuracies": 0.875, "rewards/chosen": 0.7183789610862732, "rewards/margins": 1.645146369934082, "rewards/rejected": -0.9267674088478088, "step": 6032 }, { "epoch": 0.7, "learning_rate": 9.274259627765423e-08, "logits/chosen": -3.0267701148986816, "logits/rejected": -2.7622506618499756, "logps/chosen": -343.67755126953125, "logps/rejected": -242.3269805908203, "loss": 0.169, "rewards/accuracies": 1.0, "rewards/chosen": -0.05399667099118233, "rewards/margins": 2.347836494445801, "rewards/rejected": -2.4018332958221436, "step": 6033 }, { "epoch": 0.7, "learning_rate": 9.270747980802997e-08, "logits/chosen": -2.440619468688965, "logits/rejected": -2.7685794830322266, "logps/chosen": -203.4610595703125, "logps/rejected": -275.51824951171875, "loss": 0.1753, "rewards/accuracies": 0.875, "rewards/chosen": 0.26984837651252747, "rewards/margins": 2.978186845779419, "rewards/rejected": -2.708338737487793, "step": 6034 }, { "epoch": 0.7, "learning_rate": 9.267236333840571e-08, "logits/chosen": -3.2240753173828125, "logits/rejected": -3.0250988006591797, "logps/chosen": -224.53692626953125, "logps/rejected": -279.63531494140625, "loss": 0.2316, "rewards/accuracies": 1.0, "rewards/chosen": 0.5075360536575317, "rewards/margins": 2.391237258911133, "rewards/rejected": -1.8837010860443115, "step": 6035 }, { "epoch": 0.7, "learning_rate": 9.263724686878145e-08, "logits/chosen": -3.7428970336914062, "logits/rejected": -3.705693483352661, "logps/chosen": -223.6096954345703, "logps/rejected": -271.5885009765625, "loss": 0.6361, "rewards/accuracies": 0.5, "rewards/chosen": -0.6892812252044678, "rewards/margins": 1.140865445137024, "rewards/rejected": -1.8301466703414917, "step": 6036 }, { "epoch": 0.7, "learning_rate": 9.26021303991572e-08, "logits/chosen": -3.426088571548462, "logits/rejected": -3.5995864868164062, "logps/chosen": -239.39895629882812, "logps/rejected": -218.89151000976562, "loss": 0.1863, "rewards/accuracies": 0.875, "rewards/chosen": 0.4651263952255249, "rewards/margins": 2.5338783264160156, "rewards/rejected": -2.0687522888183594, "step": 6037 }, { "epoch": 0.7, "learning_rate": 9.256701392953296e-08, "logits/chosen": -2.875925064086914, "logits/rejected": -3.063441276550293, "logps/chosen": -449.74029541015625, "logps/rejected": -288.1749572753906, "loss": 0.743, "rewards/accuracies": 0.75, "rewards/chosen": 0.09408295154571533, "rewards/margins": 1.5425708293914795, "rewards/rejected": -1.4484879970550537, "step": 6038 }, { "epoch": 0.7, "learning_rate": 9.25318974599087e-08, "logits/chosen": -2.52923321723938, "logits/rejected": -2.668139696121216, "logps/chosen": -254.533447265625, "logps/rejected": -269.9871520996094, "loss": 0.4486, "rewards/accuracies": 0.625, "rewards/chosen": -0.17552728950977325, "rewards/margins": 2.4032933712005615, "rewards/rejected": -2.5788207054138184, "step": 6039 }, { "epoch": 0.7, "learning_rate": 9.249678099028444e-08, "logits/chosen": -2.237473487854004, "logits/rejected": -2.6885571479797363, "logps/chosen": -649.3207397460938, "logps/rejected": -366.9460144042969, "loss": 0.1993, "rewards/accuracies": 1.0, "rewards/chosen": 0.1478407084941864, "rewards/margins": 2.377479076385498, "rewards/rejected": -2.2296383380889893, "step": 6040 }, { "epoch": 0.7, "learning_rate": 9.246166452066019e-08, "logits/chosen": -2.454895496368408, "logits/rejected": -2.4586822986602783, "logps/chosen": -404.7857666015625, "logps/rejected": -488.2228698730469, "loss": 0.1728, "rewards/accuracies": 1.0, "rewards/chosen": 0.38262176513671875, "rewards/margins": 3.266193389892578, "rewards/rejected": -2.8835718631744385, "step": 6041 }, { "epoch": 0.7, "learning_rate": 9.242654805103593e-08, "logits/chosen": -3.1079211235046387, "logits/rejected": -3.2922182083129883, "logps/chosen": -280.726806640625, "logps/rejected": -317.96234130859375, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": 0.3380165100097656, "rewards/margins": 3.5078577995300293, "rewards/rejected": -3.1698412895202637, "step": 6042 }, { "epoch": 0.7, "learning_rate": 9.239143158141167e-08, "logits/chosen": -3.528200626373291, "logits/rejected": -3.416769504547119, "logps/chosen": -217.13360595703125, "logps/rejected": -288.1899719238281, "loss": 0.2145, "rewards/accuracies": 1.0, "rewards/chosen": -0.19072790443897247, "rewards/margins": 2.006965160369873, "rewards/rejected": -2.197693109512329, "step": 6043 }, { "epoch": 0.7, "learning_rate": 9.235631511178743e-08, "logits/chosen": -2.812445640563965, "logits/rejected": -2.6292426586151123, "logps/chosen": -414.078369140625, "logps/rejected": -199.8181610107422, "loss": 0.2985, "rewards/accuracies": 0.875, "rewards/chosen": -0.10433898866176605, "rewards/margins": 1.5319812297821045, "rewards/rejected": -1.6363203525543213, "step": 6044 }, { "epoch": 0.7, "learning_rate": 9.232119864216318e-08, "logits/chosen": -3.715461015701294, "logits/rejected": -3.743572235107422, "logps/chosen": -143.80255126953125, "logps/rejected": -159.6412811279297, "loss": 0.5846, "rewards/accuracies": 0.75, "rewards/chosen": -0.314277708530426, "rewards/margins": 0.7908638715744019, "rewards/rejected": -1.1051416397094727, "step": 6045 }, { "epoch": 0.7, "learning_rate": 9.228608217253892e-08, "logits/chosen": -2.667994499206543, "logits/rejected": -2.778740882873535, "logps/chosen": -418.58172607421875, "logps/rejected": -291.07830810546875, "loss": 0.3009, "rewards/accuracies": 1.0, "rewards/chosen": 0.04889379441738129, "rewards/margins": 1.8454307317733765, "rewards/rejected": -1.7965368032455444, "step": 6046 }, { "epoch": 0.7, "learning_rate": 9.225096570291466e-08, "logits/chosen": -3.1289799213409424, "logits/rejected": -3.312389850616455, "logps/chosen": -448.3828125, "logps/rejected": -415.6953125, "loss": 0.1706, "rewards/accuracies": 0.875, "rewards/chosen": 0.3021773099899292, "rewards/margins": 4.476931571960449, "rewards/rejected": -4.1747541427612305, "step": 6047 }, { "epoch": 0.7, "learning_rate": 9.22158492332904e-08, "logits/chosen": -2.8957228660583496, "logits/rejected": -2.8582935333251953, "logps/chosen": -239.32534790039062, "logps/rejected": -188.1544952392578, "loss": 0.5161, "rewards/accuracies": 0.625, "rewards/chosen": -0.14111265540122986, "rewards/margins": 0.7452707886695862, "rewards/rejected": -0.8863834142684937, "step": 6048 }, { "epoch": 0.7, "learning_rate": 9.218073276366617e-08, "logits/chosen": -3.3925716876983643, "logits/rejected": -3.2689249515533447, "logps/chosen": -142.191162109375, "logps/rejected": -148.77618408203125, "loss": 0.5908, "rewards/accuracies": 0.625, "rewards/chosen": -0.3154623508453369, "rewards/margins": 1.7119698524475098, "rewards/rejected": -2.0274322032928467, "step": 6049 }, { "epoch": 0.7, "learning_rate": 9.214561629404191e-08, "logits/chosen": -3.174285888671875, "logits/rejected": -3.196530818939209, "logps/chosen": -198.98968505859375, "logps/rejected": -266.7240295410156, "loss": 0.0691, "rewards/accuracies": 1.0, "rewards/chosen": 0.3481619358062744, "rewards/margins": 3.261443614959717, "rewards/rejected": -2.9132819175720215, "step": 6050 }, { "epoch": 0.7, "learning_rate": 9.211049982441765e-08, "logits/chosen": -2.906209945678711, "logits/rejected": -2.814199209213257, "logps/chosen": -437.9550476074219, "logps/rejected": -383.6017761230469, "loss": 0.1689, "rewards/accuracies": 1.0, "rewards/chosen": 0.000168532133102417, "rewards/margins": 2.364654302597046, "rewards/rejected": -2.364485740661621, "step": 6051 }, { "epoch": 0.7, "learning_rate": 9.207538335479339e-08, "logits/chosen": -2.0102009773254395, "logits/rejected": -2.229428768157959, "logps/chosen": -523.545166015625, "logps/rejected": -334.9476013183594, "loss": 0.1173, "rewards/accuracies": 1.0, "rewards/chosen": 0.5726315379142761, "rewards/margins": 3.4219021797180176, "rewards/rejected": -2.8492705821990967, "step": 6052 }, { "epoch": 0.7, "learning_rate": 9.204026688516913e-08, "logits/chosen": -3.15023136138916, "logits/rejected": -2.8303704261779785, "logps/chosen": -315.3748779296875, "logps/rejected": -290.06939697265625, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": 0.26177504658699036, "rewards/margins": 4.159036159515381, "rewards/rejected": -3.897260904312134, "step": 6053 }, { "epoch": 0.7, "learning_rate": 9.200515041554488e-08, "logits/chosen": -3.0002810955047607, "logits/rejected": -2.9708199501037598, "logps/chosen": -136.59017944335938, "logps/rejected": -192.55068969726562, "loss": 0.3525, "rewards/accuracies": 0.75, "rewards/chosen": -0.26622360944747925, "rewards/margins": 2.1436069011688232, "rewards/rejected": -2.4098305702209473, "step": 6054 }, { "epoch": 0.7, "learning_rate": 9.197003394592064e-08, "logits/chosen": -2.811819553375244, "logits/rejected": -2.999253749847412, "logps/chosen": -246.32791137695312, "logps/rejected": -222.65985107421875, "loss": 0.3041, "rewards/accuracies": 0.875, "rewards/chosen": 0.22088705003261566, "rewards/margins": 1.7704548835754395, "rewards/rejected": -1.549567699432373, "step": 6055 }, { "epoch": 0.7, "learning_rate": 9.193491747629638e-08, "logits/chosen": -3.4330711364746094, "logits/rejected": -3.3735861778259277, "logps/chosen": -492.11517333984375, "logps/rejected": -312.5006103515625, "loss": 0.8913, "rewards/accuracies": 0.875, "rewards/chosen": -0.19686943292617798, "rewards/margins": 1.9728426933288574, "rewards/rejected": -2.1697120666503906, "step": 6056 }, { "epoch": 0.7, "learning_rate": 9.189980100667212e-08, "logits/chosen": -3.257622241973877, "logits/rejected": -3.226264476776123, "logps/chosen": -218.746337890625, "logps/rejected": -230.16815185546875, "loss": 0.2996, "rewards/accuracies": 0.875, "rewards/chosen": -0.0930292159318924, "rewards/margins": 1.5349969863891602, "rewards/rejected": -1.6280262470245361, "step": 6057 }, { "epoch": 0.7, "learning_rate": 9.186468453704787e-08, "logits/chosen": -2.836535930633545, "logits/rejected": -2.8195290565490723, "logps/chosen": -477.6898193359375, "logps/rejected": -306.9068603515625, "loss": 0.4027, "rewards/accuracies": 0.875, "rewards/chosen": 0.15680019557476044, "rewards/margins": 1.6322978734970093, "rewards/rejected": -1.4754977226257324, "step": 6058 }, { "epoch": 0.7, "learning_rate": 9.182956806742361e-08, "logits/chosen": -2.360788583755493, "logits/rejected": -2.4111762046813965, "logps/chosen": -238.1256103515625, "logps/rejected": -242.71107482910156, "loss": 0.7039, "rewards/accuracies": 0.625, "rewards/chosen": -0.5982565879821777, "rewards/margins": 1.0684376955032349, "rewards/rejected": -1.6666942834854126, "step": 6059 }, { "epoch": 0.7, "learning_rate": 9.179445159779936e-08, "logits/chosen": -3.192493438720703, "logits/rejected": -3.2156810760498047, "logps/chosen": -440.06573486328125, "logps/rejected": -320.5229797363281, "loss": 0.3079, "rewards/accuracies": 0.75, "rewards/chosen": -0.04629439115524292, "rewards/margins": 1.4159642457962036, "rewards/rejected": -1.4622585773468018, "step": 6060 }, { "epoch": 0.7, "learning_rate": 9.175933512817511e-08, "logits/chosen": -3.0336451530456543, "logits/rejected": -3.309845209121704, "logps/chosen": -248.6119384765625, "logps/rejected": -323.28802490234375, "loss": 0.2537, "rewards/accuracies": 0.875, "rewards/chosen": -0.6368128061294556, "rewards/margins": 2.2356953620910645, "rewards/rejected": -2.8725080490112305, "step": 6061 }, { "epoch": 0.7, "learning_rate": 9.172421865855086e-08, "logits/chosen": -3.4810075759887695, "logits/rejected": -3.614983558654785, "logps/chosen": -301.15960693359375, "logps/rejected": -296.7435302734375, "loss": 0.3215, "rewards/accuracies": 0.875, "rewards/chosen": -0.03429059684276581, "rewards/margins": 2.6086015701293945, "rewards/rejected": -2.6428921222686768, "step": 6062 }, { "epoch": 0.7, "learning_rate": 9.16891021889266e-08, "logits/chosen": -3.002271890640259, "logits/rejected": -3.244307041168213, "logps/chosen": -350.5118713378906, "logps/rejected": -315.4681091308594, "loss": 0.4497, "rewards/accuracies": 0.75, "rewards/chosen": -0.02590184658765793, "rewards/margins": 1.4249475002288818, "rewards/rejected": -1.4508495330810547, "step": 6063 }, { "epoch": 0.7, "learning_rate": 9.165398571930234e-08, "logits/chosen": -2.8498380184173584, "logits/rejected": -2.5940990447998047, "logps/chosen": -258.8536376953125, "logps/rejected": -524.382080078125, "loss": 0.0899, "rewards/accuracies": 1.0, "rewards/chosen": 0.41133415699005127, "rewards/margins": 3.3640987873077393, "rewards/rejected": -2.9527647495269775, "step": 6064 }, { "epoch": 0.7, "learning_rate": 9.161886924967808e-08, "logits/chosen": -2.536240339279175, "logits/rejected": -2.5002944469451904, "logps/chosen": -170.204345703125, "logps/rejected": -216.60829162597656, "loss": 0.3338, "rewards/accuracies": 0.75, "rewards/chosen": -0.0811450332403183, "rewards/margins": 1.2604748010635376, "rewards/rejected": -1.3416199684143066, "step": 6065 }, { "epoch": 0.7, "learning_rate": 9.158375278005385e-08, "logits/chosen": -3.03999400138855, "logits/rejected": -3.0646538734436035, "logps/chosen": -271.26519775390625, "logps/rejected": -309.0666198730469, "loss": 0.37, "rewards/accuracies": 0.875, "rewards/chosen": -0.2324630618095398, "rewards/margins": 1.3562512397766113, "rewards/rejected": -1.5887142419815063, "step": 6066 }, { "epoch": 0.7, "learning_rate": 9.154863631042959e-08, "logits/chosen": -2.418137788772583, "logits/rejected": -2.694409132003784, "logps/chosen": -330.63702392578125, "logps/rejected": -254.02572631835938, "loss": 0.3293, "rewards/accuracies": 0.75, "rewards/chosen": 0.019152171909809113, "rewards/margins": 1.9603618383407593, "rewards/rejected": -1.9412097930908203, "step": 6067 }, { "epoch": 0.7, "learning_rate": 9.151351984080533e-08, "logits/chosen": -3.2673165798187256, "logits/rejected": -3.0121593475341797, "logps/chosen": -356.43536376953125, "logps/rejected": -230.07467651367188, "loss": 0.2976, "rewards/accuracies": 0.75, "rewards/chosen": -0.21578852832317352, "rewards/margins": 2.2881555557250977, "rewards/rejected": -2.503943920135498, "step": 6068 }, { "epoch": 0.7, "learning_rate": 9.147840337118107e-08, "logits/chosen": -3.1592843532562256, "logits/rejected": -2.9387900829315186, "logps/chosen": -302.62225341796875, "logps/rejected": -250.63424682617188, "loss": 0.7148, "rewards/accuracies": 0.625, "rewards/chosen": -0.47849375009536743, "rewards/margins": 1.190877914428711, "rewards/rejected": -1.6693716049194336, "step": 6069 }, { "epoch": 0.7, "learning_rate": 9.144328690155683e-08, "logits/chosen": -2.6925301551818848, "logits/rejected": -2.781449317932129, "logps/chosen": -105.93038940429688, "logps/rejected": -203.29666137695312, "loss": 0.5998, "rewards/accuracies": 0.75, "rewards/chosen": -0.4699668288230896, "rewards/margins": 1.1532864570617676, "rewards/rejected": -1.6232531070709229, "step": 6070 }, { "epoch": 0.7, "learning_rate": 9.140817043193257e-08, "logits/chosen": -2.662412405014038, "logits/rejected": -2.913966417312622, "logps/chosen": -347.3652038574219, "logps/rejected": -334.6928405761719, "loss": 0.1432, "rewards/accuracies": 1.0, "rewards/chosen": 0.2717040777206421, "rewards/margins": 3.7210068702697754, "rewards/rejected": -3.449303150177002, "step": 6071 }, { "epoch": 0.7, "learning_rate": 9.137305396230832e-08, "logits/chosen": -3.0360212326049805, "logits/rejected": -2.959798812866211, "logps/chosen": -173.12088012695312, "logps/rejected": -232.25379943847656, "loss": 0.3678, "rewards/accuracies": 0.875, "rewards/chosen": 0.19053027033805847, "rewards/margins": 1.7906609773635864, "rewards/rejected": -1.6001307964324951, "step": 6072 }, { "epoch": 0.7, "learning_rate": 9.133793749268406e-08, "logits/chosen": -3.1001136302948, "logits/rejected": -2.922250270843506, "logps/chosen": -106.47547912597656, "logps/rejected": -194.431884765625, "loss": 0.3556, "rewards/accuracies": 0.75, "rewards/chosen": 0.3614736795425415, "rewards/margins": 1.6043440103530884, "rewards/rejected": -1.2428703308105469, "step": 6073 }, { "epoch": 0.7, "learning_rate": 9.130282102305982e-08, "logits/chosen": -3.559642791748047, "logits/rejected": -3.758878231048584, "logps/chosen": -331.7237854003906, "logps/rejected": -278.133544921875, "loss": 0.1923, "rewards/accuracies": 0.875, "rewards/chosen": 0.4249231219291687, "rewards/margins": 2.4996120929718018, "rewards/rejected": -2.0746891498565674, "step": 6074 }, { "epoch": 0.7, "learning_rate": 9.126770455343556e-08, "logits/chosen": -2.680734634399414, "logits/rejected": -2.8921382427215576, "logps/chosen": -414.3327941894531, "logps/rejected": -391.3150634765625, "loss": 0.354, "rewards/accuracies": 0.875, "rewards/chosen": -0.03231450915336609, "rewards/margins": 2.0791215896606445, "rewards/rejected": -2.111435890197754, "step": 6075 }, { "epoch": 0.7, "learning_rate": 9.12325880838113e-08, "logits/chosen": -2.6257877349853516, "logits/rejected": -2.8078393936157227, "logps/chosen": -281.26312255859375, "logps/rejected": -294.7520751953125, "loss": 0.367, "rewards/accuracies": 0.875, "rewards/chosen": 0.10890139639377594, "rewards/margins": 1.608031153678894, "rewards/rejected": -1.4991297721862793, "step": 6076 }, { "epoch": 0.7, "learning_rate": 9.119747161418704e-08, "logits/chosen": -2.4372124671936035, "logits/rejected": -2.6279287338256836, "logps/chosen": -238.24945068359375, "logps/rejected": -251.92041015625, "loss": 0.2267, "rewards/accuracies": 1.0, "rewards/chosen": 0.18097522854804993, "rewards/margins": 2.335618734359741, "rewards/rejected": -2.1546435356140137, "step": 6077 }, { "epoch": 0.7, "learning_rate": 9.11623551445628e-08, "logits/chosen": -3.894831657409668, "logits/rejected": -3.7625513076782227, "logps/chosen": -311.07421875, "logps/rejected": -251.59275817871094, "loss": 0.163, "rewards/accuracies": 1.0, "rewards/chosen": 0.18871673941612244, "rewards/margins": 2.9271490573883057, "rewards/rejected": -2.7384324073791504, "step": 6078 }, { "epoch": 0.7, "learning_rate": 9.112723867493855e-08, "logits/chosen": -3.4402084350585938, "logits/rejected": -3.331296682357788, "logps/chosen": -435.89031982421875, "logps/rejected": -538.884033203125, "loss": 0.8706, "rewards/accuracies": 0.75, "rewards/chosen": -0.12662279605865479, "rewards/margins": 1.9657783508300781, "rewards/rejected": -2.0924010276794434, "step": 6079 }, { "epoch": 0.7, "learning_rate": 9.109212220531429e-08, "logits/chosen": -2.9836983680725098, "logits/rejected": -2.8109748363494873, "logps/chosen": -320.979736328125, "logps/rejected": -370.1586608886719, "loss": 0.4485, "rewards/accuracies": 0.75, "rewards/chosen": -0.4274188280105591, "rewards/margins": 1.5109072923660278, "rewards/rejected": -1.938326120376587, "step": 6080 }, { "epoch": 0.7, "learning_rate": 9.105700573569003e-08, "logits/chosen": -2.84674072265625, "logits/rejected": -3.2173678874969482, "logps/chosen": -213.95053100585938, "logps/rejected": -224.9061279296875, "loss": 0.4211, "rewards/accuracies": 0.75, "rewards/chosen": -0.8070627450942993, "rewards/margins": 1.4396345615386963, "rewards/rejected": -2.246697425842285, "step": 6081 }, { "epoch": 0.7, "learning_rate": 9.10218892660658e-08, "logits/chosen": -2.9770922660827637, "logits/rejected": -3.2050983905792236, "logps/chosen": -288.4244384765625, "logps/rejected": -356.51092529296875, "loss": 0.5225, "rewards/accuracies": 0.75, "rewards/chosen": -0.3454131484031677, "rewards/margins": 1.7057697772979736, "rewards/rejected": -2.051182985305786, "step": 6082 }, { "epoch": 0.7, "learning_rate": 9.098677279644153e-08, "logits/chosen": -2.998551368713379, "logits/rejected": -3.08260440826416, "logps/chosen": -398.1041259765625, "logps/rejected": -208.4000701904297, "loss": 0.4757, "rewards/accuracies": 0.875, "rewards/chosen": -0.3431411385536194, "rewards/margins": 0.9253236651420593, "rewards/rejected": -1.2684648036956787, "step": 6083 }, { "epoch": 0.7, "learning_rate": 9.095165632681728e-08, "logits/chosen": -3.5488781929016113, "logits/rejected": -3.564073085784912, "logps/chosen": -314.515869140625, "logps/rejected": -269.0707702636719, "loss": 0.1887, "rewards/accuracies": 0.875, "rewards/chosen": 0.31776294112205505, "rewards/margins": 3.636610984802246, "rewards/rejected": -3.318848133087158, "step": 6084 }, { "epoch": 0.7, "learning_rate": 9.091653985719302e-08, "logits/chosen": -3.2480010986328125, "logits/rejected": -3.6437478065490723, "logps/chosen": -228.5526885986328, "logps/rejected": -302.8901062011719, "loss": 0.2915, "rewards/accuracies": 0.875, "rewards/chosen": -0.004967305809259415, "rewards/margins": 2.253704786300659, "rewards/rejected": -2.2586722373962402, "step": 6085 }, { "epoch": 0.7, "learning_rate": 9.088142338756877e-08, "logits/chosen": -3.7899441719055176, "logits/rejected": -3.668755531311035, "logps/chosen": -149.9021453857422, "logps/rejected": -230.98294067382812, "loss": 0.3714, "rewards/accuracies": 0.75, "rewards/chosen": 0.3661889433860779, "rewards/margins": 1.84990656375885, "rewards/rejected": -1.483717679977417, "step": 6086 }, { "epoch": 0.7, "learning_rate": 9.084630691794451e-08, "logits/chosen": -3.3977017402648926, "logits/rejected": -3.2625198364257812, "logps/chosen": -262.5534362792969, "logps/rejected": -176.13699340820312, "loss": 0.4267, "rewards/accuracies": 0.75, "rewards/chosen": -0.4516794979572296, "rewards/margins": 1.8254063129425049, "rewards/rejected": -2.277086019515991, "step": 6087 }, { "epoch": 0.7, "learning_rate": 9.081119044832025e-08, "logits/chosen": -2.60337495803833, "logits/rejected": -2.8215227127075195, "logps/chosen": -313.906494140625, "logps/rejected": -337.74822998046875, "loss": 0.135, "rewards/accuracies": 1.0, "rewards/chosen": -0.2593235373497009, "rewards/margins": 2.7231969833374023, "rewards/rejected": -2.982520580291748, "step": 6088 }, { "epoch": 0.7, "learning_rate": 9.0776073978696e-08, "logits/chosen": -3.139949083328247, "logits/rejected": -2.6737875938415527, "logps/chosen": -291.984130859375, "logps/rejected": -229.77215576171875, "loss": 0.2828, "rewards/accuracies": 0.75, "rewards/chosen": 0.3035607635974884, "rewards/margins": 2.116612434387207, "rewards/rejected": -1.813051462173462, "step": 6089 }, { "epoch": 0.7, "learning_rate": 9.074095750907176e-08, "logits/chosen": -2.9340176582336426, "logits/rejected": -2.796393871307373, "logps/chosen": -282.9796142578125, "logps/rejected": -174.66146850585938, "loss": 0.232, "rewards/accuracies": 0.875, "rewards/chosen": 0.9376305341720581, "rewards/margins": 2.3397605419158936, "rewards/rejected": -1.4021300077438354, "step": 6090 }, { "epoch": 0.7, "learning_rate": 9.07058410394475e-08, "logits/chosen": -2.482470989227295, "logits/rejected": -2.5537121295928955, "logps/chosen": -347.8477478027344, "logps/rejected": -305.0164794921875, "loss": 0.4984, "rewards/accuracies": 0.75, "rewards/chosen": -0.04678768664598465, "rewards/margins": 0.8753702640533447, "rewards/rejected": -0.9221579432487488, "step": 6091 }, { "epoch": 0.7, "learning_rate": 9.067072456982324e-08, "logits/chosen": -3.0897862911224365, "logits/rejected": -2.987215518951416, "logps/chosen": -185.39601135253906, "logps/rejected": -264.1693115234375, "loss": 0.1485, "rewards/accuracies": 1.0, "rewards/chosen": 0.3489857017993927, "rewards/margins": 1.9779059886932373, "rewards/rejected": -1.6289201974868774, "step": 6092 }, { "epoch": 0.7, "learning_rate": 9.063560810019898e-08, "logits/chosen": -3.1046433448791504, "logits/rejected": -3.0982935428619385, "logps/chosen": -248.7295684814453, "logps/rejected": -208.10768127441406, "loss": 0.2503, "rewards/accuracies": 0.875, "rewards/chosen": 0.17266197502613068, "rewards/margins": 2.203451633453369, "rewards/rejected": -2.030789852142334, "step": 6093 }, { "epoch": 0.7, "learning_rate": 9.060049163057475e-08, "logits/chosen": -2.681684970855713, "logits/rejected": -2.47979474067688, "logps/chosen": -153.69595336914062, "logps/rejected": -297.994140625, "loss": 0.3043, "rewards/accuracies": 0.875, "rewards/chosen": -0.3144238591194153, "rewards/margins": 1.8866074085235596, "rewards/rejected": -2.20103120803833, "step": 6094 }, { "epoch": 0.7, "learning_rate": 9.056537516095049e-08, "logits/chosen": -3.5225517749786377, "logits/rejected": -3.409306526184082, "logps/chosen": -227.333740234375, "logps/rejected": -201.8513946533203, "loss": 0.4026, "rewards/accuracies": 0.875, "rewards/chosen": -0.4450843334197998, "rewards/margins": 1.2728307247161865, "rewards/rejected": -1.7179150581359863, "step": 6095 }, { "epoch": 0.7, "learning_rate": 9.053025869132623e-08, "logits/chosen": -3.0217068195343018, "logits/rejected": -2.8974878787994385, "logps/chosen": -369.67828369140625, "logps/rejected": -266.4863586425781, "loss": 0.6088, "rewards/accuracies": 0.5, "rewards/chosen": -0.5019599795341492, "rewards/margins": 2.0528693199157715, "rewards/rejected": -2.5548291206359863, "step": 6096 }, { "epoch": 0.7, "learning_rate": 9.049514222170197e-08, "logits/chosen": -2.9865059852600098, "logits/rejected": -3.0960254669189453, "logps/chosen": -209.69374084472656, "logps/rejected": -306.7557678222656, "loss": 0.2512, "rewards/accuracies": 0.875, "rewards/chosen": -0.30875837802886963, "rewards/margins": 1.9658623933792114, "rewards/rejected": -2.274620771408081, "step": 6097 }, { "epoch": 0.7, "learning_rate": 9.046002575207771e-08, "logits/chosen": -2.866668939590454, "logits/rejected": -3.0981502532958984, "logps/chosen": -104.46270751953125, "logps/rejected": -130.6959686279297, "loss": 0.2893, "rewards/accuracies": 1.0, "rewards/chosen": 0.45351600646972656, "rewards/margins": 1.7967004776000977, "rewards/rejected": -1.343184471130371, "step": 6098 }, { "epoch": 0.7, "learning_rate": 9.042490928245348e-08, "logits/chosen": -2.996884822845459, "logits/rejected": -3.12359619140625, "logps/chosen": -371.37310791015625, "logps/rejected": -319.3813171386719, "loss": 0.6709, "rewards/accuracies": 0.75, "rewards/chosen": 0.07408209145069122, "rewards/margins": 0.9064451456069946, "rewards/rejected": -0.8323631286621094, "step": 6099 }, { "epoch": 0.7, "learning_rate": 9.038979281282922e-08, "logits/chosen": -2.2885138988494873, "logits/rejected": -2.1659529209136963, "logps/chosen": -405.9176940917969, "logps/rejected": -312.5859375, "loss": 0.6116, "rewards/accuracies": 0.625, "rewards/chosen": 0.21587128937244415, "rewards/margins": 1.4609031677246094, "rewards/rejected": -1.2450318336486816, "step": 6100 }, { "epoch": 0.7, "learning_rate": 9.035467634320496e-08, "logits/chosen": -3.3562283515930176, "logits/rejected": -3.496826410293579, "logps/chosen": -114.05194854736328, "logps/rejected": -212.5638427734375, "loss": 0.3496, "rewards/accuracies": 1.0, "rewards/chosen": 0.04160606861114502, "rewards/margins": 2.619828939437866, "rewards/rejected": -2.5782227516174316, "step": 6101 }, { "epoch": 0.7, "learning_rate": 9.03195598735807e-08, "logits/chosen": -2.2846920490264893, "logits/rejected": -2.4456565380096436, "logps/chosen": -237.18800354003906, "logps/rejected": -292.17706298828125, "loss": 0.3169, "rewards/accuracies": 0.875, "rewards/chosen": 0.7717079520225525, "rewards/margins": 1.784788727760315, "rewards/rejected": -1.0130809545516968, "step": 6102 }, { "epoch": 0.7, "learning_rate": 9.028444340395645e-08, "logits/chosen": -3.280986785888672, "logits/rejected": -3.1609320640563965, "logps/chosen": -249.81687927246094, "logps/rejected": -246.35687255859375, "loss": 0.4159, "rewards/accuracies": 0.875, "rewards/chosen": -1.1320053339004517, "rewards/margins": 1.181520700454712, "rewards/rejected": -2.313525915145874, "step": 6103 }, { "epoch": 0.7, "learning_rate": 9.02493269343322e-08, "logits/chosen": -3.284933090209961, "logits/rejected": -3.030592918395996, "logps/chosen": -164.506103515625, "logps/rejected": -236.7853546142578, "loss": 0.6622, "rewards/accuracies": 0.5, "rewards/chosen": -0.5435166358947754, "rewards/margins": 0.7093006372451782, "rewards/rejected": -1.2528172731399536, "step": 6104 }, { "epoch": 0.7, "learning_rate": 9.021421046470793e-08, "logits/chosen": -3.058279037475586, "logits/rejected": -2.8571083545684814, "logps/chosen": -240.7840118408203, "logps/rejected": -259.4144592285156, "loss": 0.2783, "rewards/accuracies": 1.0, "rewards/chosen": 0.41717565059661865, "rewards/margins": 1.896039605140686, "rewards/rejected": -1.4788639545440674, "step": 6105 }, { "epoch": 0.7, "learning_rate": 9.017909399508369e-08, "logits/chosen": -3.3358988761901855, "logits/rejected": -3.0481863021850586, "logps/chosen": -160.75454711914062, "logps/rejected": -254.9085235595703, "loss": 0.6707, "rewards/accuracies": 0.75, "rewards/chosen": -0.39141929149627686, "rewards/margins": 0.4302743077278137, "rewards/rejected": -0.8216936588287354, "step": 6106 }, { "epoch": 0.7, "learning_rate": 9.014397752545944e-08, "logits/chosen": -2.9087164402008057, "logits/rejected": -3.0266458988189697, "logps/chosen": -302.02911376953125, "logps/rejected": -325.68829345703125, "loss": 0.6052, "rewards/accuracies": 0.75, "rewards/chosen": -0.3046901226043701, "rewards/margins": 0.6246770620346069, "rewards/rejected": -0.929367184638977, "step": 6107 }, { "epoch": 0.7, "learning_rate": 9.010886105583518e-08, "logits/chosen": -3.644380569458008, "logits/rejected": -3.3995068073272705, "logps/chosen": -371.10052490234375, "logps/rejected": -199.57525634765625, "loss": 0.3762, "rewards/accuracies": 0.75, "rewards/chosen": 0.4111597239971161, "rewards/margins": 1.5755419731140137, "rewards/rejected": -1.1643824577331543, "step": 6108 }, { "epoch": 0.7, "learning_rate": 9.007374458621092e-08, "logits/chosen": -2.690711498260498, "logits/rejected": -2.697741746902466, "logps/chosen": -398.8992919921875, "logps/rejected": -377.29412841796875, "loss": 0.3093, "rewards/accuracies": 0.875, "rewards/chosen": 0.3005514144897461, "rewards/margins": 2.216306686401367, "rewards/rejected": -1.915755033493042, "step": 6109 }, { "epoch": 0.7, "learning_rate": 9.003862811658666e-08, "logits/chosen": -3.6344897747039795, "logits/rejected": -3.5506889820098877, "logps/chosen": -255.87742614746094, "logps/rejected": -295.5720520019531, "loss": 0.487, "rewards/accuracies": 0.75, "rewards/chosen": 0.04624398052692413, "rewards/margins": 1.099831461906433, "rewards/rejected": -1.0535873174667358, "step": 6110 }, { "epoch": 0.7, "learning_rate": 9.000351164696243e-08, "logits/chosen": -2.8294291496276855, "logits/rejected": -3.3157291412353516, "logps/chosen": -252.84255981445312, "logps/rejected": -184.09959411621094, "loss": 0.3571, "rewards/accuracies": 0.875, "rewards/chosen": -0.3211233913898468, "rewards/margins": 1.4139246940612793, "rewards/rejected": -1.7350480556488037, "step": 6111 }, { "epoch": 0.7, "learning_rate": 8.996839517733817e-08, "logits/chosen": -3.0712387561798096, "logits/rejected": -2.8537580966949463, "logps/chosen": -310.1214904785156, "logps/rejected": -265.24652099609375, "loss": 0.5207, "rewards/accuracies": 0.75, "rewards/chosen": -0.20905832946300507, "rewards/margins": 1.4217853546142578, "rewards/rejected": -1.6308436393737793, "step": 6112 }, { "epoch": 0.7, "learning_rate": 8.993327870771391e-08, "logits/chosen": -2.9889256954193115, "logits/rejected": -3.0674004554748535, "logps/chosen": -374.1104431152344, "logps/rejected": -400.1053466796875, "loss": 0.3966, "rewards/accuracies": 0.875, "rewards/chosen": 0.31699153780937195, "rewards/margins": 1.2058945894241333, "rewards/rejected": -0.8889029622077942, "step": 6113 }, { "epoch": 0.7, "learning_rate": 8.989816223808965e-08, "logits/chosen": -2.2791781425476074, "logits/rejected": -2.2096753120422363, "logps/chosen": -345.59149169921875, "logps/rejected": -301.6259765625, "loss": 0.3407, "rewards/accuracies": 0.875, "rewards/chosen": -0.10151234269142151, "rewards/margins": 2.1730175018310547, "rewards/rejected": -2.2745299339294434, "step": 6114 }, { "epoch": 0.7, "learning_rate": 8.986304576846541e-08, "logits/chosen": -2.6969971656799316, "logits/rejected": -3.0195586681365967, "logps/chosen": -463.4600830078125, "logps/rejected": -379.70391845703125, "loss": 0.1347, "rewards/accuracies": 0.875, "rewards/chosen": 0.5338241457939148, "rewards/margins": 3.588310480117798, "rewards/rejected": -3.0544862747192383, "step": 6115 }, { "epoch": 0.71, "learning_rate": 8.982792929884116e-08, "logits/chosen": -3.080277681350708, "logits/rejected": -3.210265874862671, "logps/chosen": -147.17086791992188, "logps/rejected": -365.3971862792969, "loss": 0.6134, "rewards/accuracies": 0.5, "rewards/chosen": 0.26703941822052, "rewards/margins": 1.7488662004470825, "rewards/rejected": -1.4818267822265625, "step": 6116 }, { "epoch": 0.71, "learning_rate": 8.97928128292169e-08, "logits/chosen": -2.134443759918213, "logits/rejected": -2.0830495357513428, "logps/chosen": -232.095458984375, "logps/rejected": -318.3380126953125, "loss": 0.2995, "rewards/accuracies": 0.875, "rewards/chosen": -0.18115587532520294, "rewards/margins": 1.5603512525558472, "rewards/rejected": -1.7415071725845337, "step": 6117 }, { "epoch": 0.71, "learning_rate": 8.975769635959264e-08, "logits/chosen": -3.3733468055725098, "logits/rejected": -3.2877206802368164, "logps/chosen": -128.2617645263672, "logps/rejected": -211.00587463378906, "loss": 0.6468, "rewards/accuracies": 0.875, "rewards/chosen": -0.27508026361465454, "rewards/margins": 0.8277932405471802, "rewards/rejected": -1.1028735637664795, "step": 6118 }, { "epoch": 0.71, "learning_rate": 8.97225798899684e-08, "logits/chosen": -2.5232133865356445, "logits/rejected": -2.5417237281799316, "logps/chosen": -419.806884765625, "logps/rejected": -297.09686279296875, "loss": 0.44, "rewards/accuracies": 0.875, "rewards/chosen": 0.4827737808227539, "rewards/margins": 1.9735772609710693, "rewards/rejected": -1.4908034801483154, "step": 6119 }, { "epoch": 0.71, "learning_rate": 8.968746342034414e-08, "logits/chosen": -3.286860942840576, "logits/rejected": -3.2591984272003174, "logps/chosen": -239.66928100585938, "logps/rejected": -289.18499755859375, "loss": 0.4889, "rewards/accuracies": 0.625, "rewards/chosen": -0.2852742373943329, "rewards/margins": 2.460202217102051, "rewards/rejected": -2.745476484298706, "step": 6120 }, { "epoch": 0.71, "learning_rate": 8.965234695071988e-08, "logits/chosen": -2.5937063694000244, "logits/rejected": -2.5896732807159424, "logps/chosen": -244.20794677734375, "logps/rejected": -212.518798828125, "loss": 0.2006, "rewards/accuracies": 1.0, "rewards/chosen": 0.7949826717376709, "rewards/margins": 2.0723118782043457, "rewards/rejected": -1.2773292064666748, "step": 6121 }, { "epoch": 0.71, "learning_rate": 8.961723048109562e-08, "logits/chosen": -2.773634195327759, "logits/rejected": -2.57669734954834, "logps/chosen": -459.1122131347656, "logps/rejected": -309.68609619140625, "loss": 0.2538, "rewards/accuracies": 0.75, "rewards/chosen": 0.8535553216934204, "rewards/margins": 2.4536514282226562, "rewards/rejected": -1.6000959873199463, "step": 6122 }, { "epoch": 0.71, "learning_rate": 8.958211401147138e-08, "logits/chosen": -2.6778433322906494, "logits/rejected": -2.903172016143799, "logps/chosen": -240.40826416015625, "logps/rejected": -169.02305603027344, "loss": 0.4505, "rewards/accuracies": 0.75, "rewards/chosen": -0.2002800554037094, "rewards/margins": 1.076744556427002, "rewards/rejected": -1.277024507522583, "step": 6123 }, { "epoch": 0.71, "learning_rate": 8.954699754184713e-08, "logits/chosen": -2.9962313175201416, "logits/rejected": -3.462285041809082, "logps/chosen": -188.261962890625, "logps/rejected": -393.4789733886719, "loss": 0.4389, "rewards/accuracies": 0.875, "rewards/chosen": -0.011936947703361511, "rewards/margins": 1.7323399782180786, "rewards/rejected": -1.7442768812179565, "step": 6124 }, { "epoch": 0.71, "learning_rate": 8.951188107222287e-08, "logits/chosen": -3.1101598739624023, "logits/rejected": -2.8698458671569824, "logps/chosen": -201.03079223632812, "logps/rejected": -229.35275268554688, "loss": 0.4257, "rewards/accuracies": 0.875, "rewards/chosen": -0.5676238536834717, "rewards/margins": 1.1767336130142212, "rewards/rejected": -1.7443575859069824, "step": 6125 }, { "epoch": 0.71, "learning_rate": 8.94767646025986e-08, "logits/chosen": -2.6355433464050293, "logits/rejected": -2.564321517944336, "logps/chosen": -308.01080322265625, "logps/rejected": -357.89263916015625, "loss": 0.4064, "rewards/accuracies": 0.875, "rewards/chosen": -0.2250383496284485, "rewards/margins": 1.7554481029510498, "rewards/rejected": -1.9804866313934326, "step": 6126 }, { "epoch": 0.71, "learning_rate": 8.944164813297437e-08, "logits/chosen": -2.6624813079833984, "logits/rejected": -2.73696231842041, "logps/chosen": -335.05767822265625, "logps/rejected": -298.4790954589844, "loss": 0.3402, "rewards/accuracies": 0.875, "rewards/chosen": 0.28701743483543396, "rewards/margins": 1.7627724409103394, "rewards/rejected": -1.475754976272583, "step": 6127 }, { "epoch": 0.71, "learning_rate": 8.940653166335011e-08, "logits/chosen": -3.1313982009887695, "logits/rejected": -3.1614720821380615, "logps/chosen": -204.92051696777344, "logps/rejected": -254.1090850830078, "loss": 0.6916, "rewards/accuracies": 0.625, "rewards/chosen": -0.3639419674873352, "rewards/margins": 0.6081781387329102, "rewards/rejected": -0.9721200466156006, "step": 6128 }, { "epoch": 0.71, "learning_rate": 8.937141519372585e-08, "logits/chosen": -3.8979644775390625, "logits/rejected": -3.6847269535064697, "logps/chosen": -150.14810180664062, "logps/rejected": -215.3777618408203, "loss": 0.3198, "rewards/accuracies": 0.875, "rewards/chosen": -0.19504497945308685, "rewards/margins": 2.20904278755188, "rewards/rejected": -2.404087543487549, "step": 6129 }, { "epoch": 0.71, "learning_rate": 8.93362987241016e-08, "logits/chosen": -3.201587677001953, "logits/rejected": -3.0247626304626465, "logps/chosen": -218.52076721191406, "logps/rejected": -232.52304077148438, "loss": 0.216, "rewards/accuracies": 0.875, "rewards/chosen": 0.252737820148468, "rewards/margins": 2.2079148292541504, "rewards/rejected": -1.9551771879196167, "step": 6130 }, { "epoch": 0.71, "learning_rate": 8.930118225447735e-08, "logits/chosen": -3.1223573684692383, "logits/rejected": -2.919534683227539, "logps/chosen": -295.696044921875, "logps/rejected": -327.59649658203125, "loss": 0.35, "rewards/accuracies": 0.875, "rewards/chosen": 0.22886496782302856, "rewards/margins": 1.7905467748641968, "rewards/rejected": -1.5616817474365234, "step": 6131 }, { "epoch": 0.71, "learning_rate": 8.926606578485309e-08, "logits/chosen": -3.1395223140716553, "logits/rejected": -3.190736770629883, "logps/chosen": -326.79840087890625, "logps/rejected": -292.4430236816406, "loss": 0.4236, "rewards/accuracies": 0.75, "rewards/chosen": -0.0670311450958252, "rewards/margins": 1.5083199739456177, "rewards/rejected": -1.5753509998321533, "step": 6132 }, { "epoch": 0.71, "learning_rate": 8.923094931522884e-08, "logits/chosen": -2.655749559402466, "logits/rejected": -2.873871088027954, "logps/chosen": -337.5138244628906, "logps/rejected": -431.7476806640625, "loss": 0.4331, "rewards/accuracies": 0.75, "rewards/chosen": -0.21541815996170044, "rewards/margins": 2.878141164779663, "rewards/rejected": -3.093559503555298, "step": 6133 }, { "epoch": 0.71, "learning_rate": 8.919583284560458e-08, "logits/chosen": -2.981325149536133, "logits/rejected": -2.8795838356018066, "logps/chosen": -158.6732940673828, "logps/rejected": -234.12741088867188, "loss": 0.533, "rewards/accuracies": 0.625, "rewards/chosen": 0.003499850630760193, "rewards/margins": 1.4250478744506836, "rewards/rejected": -1.4215480089187622, "step": 6134 }, { "epoch": 0.71, "learning_rate": 8.916071637598034e-08, "logits/chosen": -3.0649571418762207, "logits/rejected": -3.1563284397125244, "logps/chosen": -283.21856689453125, "logps/rejected": -261.4825439453125, "loss": 0.1963, "rewards/accuracies": 0.875, "rewards/chosen": 0.4392419755458832, "rewards/margins": 3.4560437202453613, "rewards/rejected": -3.0168018341064453, "step": 6135 }, { "epoch": 0.71, "learning_rate": 8.912559990635608e-08, "logits/chosen": -2.7408676147460938, "logits/rejected": -2.8689637184143066, "logps/chosen": -133.58380126953125, "logps/rejected": -320.807861328125, "loss": 0.3892, "rewards/accuracies": 0.75, "rewards/chosen": 0.054868340492248535, "rewards/margins": 1.4887454509735107, "rewards/rejected": -1.4338772296905518, "step": 6136 }, { "epoch": 0.71, "learning_rate": 8.909048343673182e-08, "logits/chosen": -2.889843463897705, "logits/rejected": -2.9238405227661133, "logps/chosen": -379.4001159667969, "logps/rejected": -417.7017517089844, "loss": 0.3091, "rewards/accuracies": 0.875, "rewards/chosen": -0.08037938177585602, "rewards/margins": 1.4220572710037231, "rewards/rejected": -1.5024367570877075, "step": 6137 }, { "epoch": 0.71, "learning_rate": 8.905536696710756e-08, "logits/chosen": -2.607058048248291, "logits/rejected": -2.584132194519043, "logps/chosen": -264.89532470703125, "logps/rejected": -255.6737518310547, "loss": 0.7552, "rewards/accuracies": 0.75, "rewards/chosen": -0.275716632604599, "rewards/margins": 0.8577706813812256, "rewards/rejected": -1.1334872245788574, "step": 6138 }, { "epoch": 0.71, "learning_rate": 8.902025049748333e-08, "logits/chosen": -2.8230292797088623, "logits/rejected": -2.7770655155181885, "logps/chosen": -413.58306884765625, "logps/rejected": -348.7840576171875, "loss": 0.1465, "rewards/accuracies": 1.0, "rewards/chosen": 0.17325459420681, "rewards/margins": 3.4127581119537354, "rewards/rejected": -3.2395036220550537, "step": 6139 }, { "epoch": 0.71, "learning_rate": 8.898513402785907e-08, "logits/chosen": -3.1502623558044434, "logits/rejected": -3.4130406379699707, "logps/chosen": -227.9558868408203, "logps/rejected": -237.7585906982422, "loss": 0.2468, "rewards/accuracies": 0.875, "rewards/chosen": 0.32965323328971863, "rewards/margins": 1.9935916662216187, "rewards/rejected": -1.6639385223388672, "step": 6140 }, { "epoch": 0.71, "learning_rate": 8.895001755823481e-08, "logits/chosen": -3.0306992530822754, "logits/rejected": -3.3390250205993652, "logps/chosen": -302.54498291015625, "logps/rejected": -367.52447509765625, "loss": 0.1801, "rewards/accuracies": 0.875, "rewards/chosen": -0.0363527312874794, "rewards/margins": 2.8550732135772705, "rewards/rejected": -2.8914263248443604, "step": 6141 }, { "epoch": 0.71, "learning_rate": 8.891490108861055e-08, "logits/chosen": -2.9788901805877686, "logits/rejected": -2.905273199081421, "logps/chosen": -196.69944763183594, "logps/rejected": -218.47891235351562, "loss": 0.4705, "rewards/accuracies": 0.625, "rewards/chosen": 0.17515115439891815, "rewards/margins": 1.062846302986145, "rewards/rejected": -0.8876950740814209, "step": 6142 }, { "epoch": 0.71, "learning_rate": 8.88797846189863e-08, "logits/chosen": -2.2676568031311035, "logits/rejected": -2.453977108001709, "logps/chosen": -240.12477111816406, "logps/rejected": -150.34104919433594, "loss": 0.9122, "rewards/accuracies": 0.625, "rewards/chosen": -0.6776915788650513, "rewards/margins": 0.30193406343460083, "rewards/rejected": -0.9796257019042969, "step": 6143 }, { "epoch": 0.71, "learning_rate": 8.884466814936206e-08, "logits/chosen": -3.252197742462158, "logits/rejected": -3.0953197479248047, "logps/chosen": -277.00811767578125, "logps/rejected": -140.44696044921875, "loss": 0.4484, "rewards/accuracies": 0.75, "rewards/chosen": -0.5606539845466614, "rewards/margins": 0.7531798481941223, "rewards/rejected": -1.3138338327407837, "step": 6144 }, { "epoch": 0.71, "learning_rate": 8.88095516797378e-08, "logits/chosen": -3.2424378395080566, "logits/rejected": -3.0616142749786377, "logps/chosen": -223.42874145507812, "logps/rejected": -232.1288604736328, "loss": 0.8824, "rewards/accuracies": 0.625, "rewards/chosen": -0.6102076768875122, "rewards/margins": 0.01983983814716339, "rewards/rejected": -0.6300475597381592, "step": 6145 }, { "epoch": 0.71, "learning_rate": 8.877443521011354e-08, "logits/chosen": -2.6053247451782227, "logits/rejected": -2.7176549434661865, "logps/chosen": -290.4808654785156, "logps/rejected": -174.80909729003906, "loss": 0.3841, "rewards/accuracies": 0.875, "rewards/chosen": -0.18050625920295715, "rewards/margins": 1.30686616897583, "rewards/rejected": -1.4873723983764648, "step": 6146 }, { "epoch": 0.71, "learning_rate": 8.873931874048928e-08, "logits/chosen": -3.060335636138916, "logits/rejected": -3.24824857711792, "logps/chosen": -217.21548461914062, "logps/rejected": -203.3995361328125, "loss": 0.4352, "rewards/accuracies": 0.875, "rewards/chosen": -0.35658836364746094, "rewards/margins": 1.695101022720337, "rewards/rejected": -2.051689386367798, "step": 6147 }, { "epoch": 0.71, "learning_rate": 8.870420227086503e-08, "logits/chosen": -3.42417573928833, "logits/rejected": -3.034993886947632, "logps/chosen": -342.2288513183594, "logps/rejected": -305.8694152832031, "loss": 0.0693, "rewards/accuracies": 1.0, "rewards/chosen": 0.4222422242164612, "rewards/margins": 3.2604455947875977, "rewards/rejected": -2.8382034301757812, "step": 6148 }, { "epoch": 0.71, "learning_rate": 8.866908580124077e-08, "logits/chosen": -3.7327561378479004, "logits/rejected": -3.2240078449249268, "logps/chosen": -448.0987243652344, "logps/rejected": -254.05499267578125, "loss": 0.2335, "rewards/accuracies": 0.875, "rewards/chosen": -0.0336490124464035, "rewards/margins": 3.5415101051330566, "rewards/rejected": -3.5751593112945557, "step": 6149 }, { "epoch": 0.71, "learning_rate": 8.863396933161653e-08, "logits/chosen": -3.5748047828674316, "logits/rejected": -3.8332791328430176, "logps/chosen": -251.20376586914062, "logps/rejected": -276.0435791015625, "loss": 0.6824, "rewards/accuracies": 0.875, "rewards/chosen": -0.024360299110412598, "rewards/margins": 2.2638607025146484, "rewards/rejected": -2.2882208824157715, "step": 6150 }, { "epoch": 0.71, "learning_rate": 8.859885286199227e-08, "logits/chosen": -2.514810562133789, "logits/rejected": -2.6988935470581055, "logps/chosen": -204.5736083984375, "logps/rejected": -319.25244140625, "loss": 0.1805, "rewards/accuracies": 1.0, "rewards/chosen": 0.4320363998413086, "rewards/margins": 3.1014516353607178, "rewards/rejected": -2.66941499710083, "step": 6151 }, { "epoch": 0.71, "learning_rate": 8.856373639236802e-08, "logits/chosen": -3.5355031490325928, "logits/rejected": -3.7244834899902344, "logps/chosen": -255.11334228515625, "logps/rejected": -233.78982543945312, "loss": 0.3693, "rewards/accuracies": 1.0, "rewards/chosen": 0.09391672164201736, "rewards/margins": 1.077737808227539, "rewards/rejected": -0.9838209748268127, "step": 6152 }, { "epoch": 0.71, "learning_rate": 8.852861992274376e-08, "logits/chosen": -3.375528335571289, "logits/rejected": -3.2704696655273438, "logps/chosen": -264.0948181152344, "logps/rejected": -181.7700653076172, "loss": 0.3765, "rewards/accuracies": 1.0, "rewards/chosen": -0.17755772173404694, "rewards/margins": 1.0674458742141724, "rewards/rejected": -1.2450034618377686, "step": 6153 }, { "epoch": 0.71, "learning_rate": 8.84935034531195e-08, "logits/chosen": -3.856771945953369, "logits/rejected": -3.809086322784424, "logps/chosen": -290.18695068359375, "logps/rejected": -214.68707275390625, "loss": 0.443, "rewards/accuracies": 0.875, "rewards/chosen": -0.7056518793106079, "rewards/margins": 2.2653257846832275, "rewards/rejected": -2.970977783203125, "step": 6154 }, { "epoch": 0.71, "learning_rate": 8.845838698349524e-08, "logits/chosen": -2.712367057800293, "logits/rejected": -2.7184433937072754, "logps/chosen": -290.42474365234375, "logps/rejected": -255.64605712890625, "loss": 0.5434, "rewards/accuracies": 0.625, "rewards/chosen": -0.6923716068267822, "rewards/margins": 0.5961600542068481, "rewards/rejected": -1.2885316610336304, "step": 6155 }, { "epoch": 0.71, "learning_rate": 8.842327051387101e-08, "logits/chosen": -2.879347324371338, "logits/rejected": -2.7579498291015625, "logps/chosen": -283.51519775390625, "logps/rejected": -303.186767578125, "loss": 0.374, "rewards/accuracies": 0.625, "rewards/chosen": -0.2077643871307373, "rewards/margins": 1.8576922416687012, "rewards/rejected": -2.0654566287994385, "step": 6156 }, { "epoch": 0.71, "learning_rate": 8.838815404424675e-08, "logits/chosen": -3.4113080501556396, "logits/rejected": -3.4759488105773926, "logps/chosen": -229.78366088867188, "logps/rejected": -269.4625549316406, "loss": 0.2575, "rewards/accuracies": 0.875, "rewards/chosen": -0.05036620795726776, "rewards/margins": 2.6533043384552, "rewards/rejected": -2.7036707401275635, "step": 6157 }, { "epoch": 0.71, "learning_rate": 8.835303757462249e-08, "logits/chosen": -3.954777956008911, "logits/rejected": -3.623379707336426, "logps/chosen": -254.99591064453125, "logps/rejected": -221.50735473632812, "loss": 0.3809, "rewards/accuracies": 0.75, "rewards/chosen": 0.2972577214241028, "rewards/margins": 2.0790953636169434, "rewards/rejected": -1.7818377017974854, "step": 6158 }, { "epoch": 0.71, "learning_rate": 8.831792110499823e-08, "logits/chosen": -3.025874376296997, "logits/rejected": -2.8948991298675537, "logps/chosen": -143.67657470703125, "logps/rejected": -206.8068084716797, "loss": 0.4815, "rewards/accuracies": 0.625, "rewards/chosen": -0.4232417345046997, "rewards/margins": 1.232729434967041, "rewards/rejected": -1.6559711694717407, "step": 6159 }, { "epoch": 0.71, "learning_rate": 8.828280463537399e-08, "logits/chosen": -3.6685056686401367, "logits/rejected": -3.7329514026641846, "logps/chosen": -230.48435974121094, "logps/rejected": -164.97317504882812, "loss": 0.6512, "rewards/accuracies": 0.5, "rewards/chosen": 0.1573733538389206, "rewards/margins": 0.9867604970932007, "rewards/rejected": -0.8293873071670532, "step": 6160 }, { "epoch": 0.71, "learning_rate": 8.824768816574974e-08, "logits/chosen": -3.0399792194366455, "logits/rejected": -2.8784451484680176, "logps/chosen": -157.69497680664062, "logps/rejected": -330.81689453125, "loss": 0.5363, "rewards/accuracies": 0.625, "rewards/chosen": -0.2155860811471939, "rewards/margins": 0.6355610489845276, "rewards/rejected": -0.8511471152305603, "step": 6161 }, { "epoch": 0.71, "learning_rate": 8.821257169612548e-08, "logits/chosen": -2.9349751472473145, "logits/rejected": -2.7918920516967773, "logps/chosen": -83.40182495117188, "logps/rejected": -258.4324951171875, "loss": 0.2391, "rewards/accuracies": 0.875, "rewards/chosen": -0.02097063511610031, "rewards/margins": 2.0818381309509277, "rewards/rejected": -2.102808952331543, "step": 6162 }, { "epoch": 0.71, "learning_rate": 8.817745522650122e-08, "logits/chosen": -3.175665855407715, "logits/rejected": -3.029724597930908, "logps/chosen": -115.06820678710938, "logps/rejected": -189.20867919921875, "loss": 0.5028, "rewards/accuracies": 0.75, "rewards/chosen": -0.6200711727142334, "rewards/margins": 1.3493478298187256, "rewards/rejected": -1.9694191217422485, "step": 6163 }, { "epoch": 0.71, "learning_rate": 8.814233875687698e-08, "logits/chosen": -3.2401554584503174, "logits/rejected": -3.0954720973968506, "logps/chosen": -217.81976318359375, "logps/rejected": -209.472900390625, "loss": 0.3087, "rewards/accuracies": 0.75, "rewards/chosen": 0.5007895231246948, "rewards/margins": 1.5985934734344482, "rewards/rejected": -1.0978039503097534, "step": 6164 }, { "epoch": 0.71, "learning_rate": 8.810722228725272e-08, "logits/chosen": -3.7518508434295654, "logits/rejected": -3.8579142093658447, "logps/chosen": -126.11347961425781, "logps/rejected": -195.9139862060547, "loss": 0.3565, "rewards/accuracies": 0.75, "rewards/chosen": -0.012776359915733337, "rewards/margins": 1.644818902015686, "rewards/rejected": -1.657595157623291, "step": 6165 }, { "epoch": 0.71, "learning_rate": 8.807210581762846e-08, "logits/chosen": -3.57435941696167, "logits/rejected": -3.378826141357422, "logps/chosen": -199.06790161132812, "logps/rejected": -157.1637725830078, "loss": 0.4637, "rewards/accuracies": 0.875, "rewards/chosen": 0.030479609966278076, "rewards/margins": 1.3187211751937866, "rewards/rejected": -1.2882415056228638, "step": 6166 }, { "epoch": 0.71, "learning_rate": 8.80369893480042e-08, "logits/chosen": -2.6487364768981934, "logits/rejected": -2.5413601398468018, "logps/chosen": -336.662353515625, "logps/rejected": -294.89691162109375, "loss": 0.6296, "rewards/accuracies": 0.75, "rewards/chosen": -0.33858636021614075, "rewards/margins": 0.7050436735153198, "rewards/rejected": -1.0436300039291382, "step": 6167 }, { "epoch": 0.71, "learning_rate": 8.800187287837996e-08, "logits/chosen": -2.376983642578125, "logits/rejected": -2.4024558067321777, "logps/chosen": -262.71685791015625, "logps/rejected": -295.6997985839844, "loss": 0.2968, "rewards/accuracies": 0.875, "rewards/chosen": -0.2541394531726837, "rewards/margins": 2.597189426422119, "rewards/rejected": -2.8513288497924805, "step": 6168 }, { "epoch": 0.71, "learning_rate": 8.79667564087557e-08, "logits/chosen": -3.5652055740356445, "logits/rejected": -3.234449863433838, "logps/chosen": -258.85235595703125, "logps/rejected": -250.91078186035156, "loss": 0.1674, "rewards/accuracies": 1.0, "rewards/chosen": 0.15751639008522034, "rewards/margins": 2.698093891143799, "rewards/rejected": -2.5405774116516113, "step": 6169 }, { "epoch": 0.71, "learning_rate": 8.793163993913145e-08, "logits/chosen": -2.5158329010009766, "logits/rejected": -3.010761022567749, "logps/chosen": -324.042724609375, "logps/rejected": -222.24356079101562, "loss": 0.5056, "rewards/accuracies": 0.625, "rewards/chosen": -0.28060489892959595, "rewards/margins": 1.1833617687225342, "rewards/rejected": -1.463966727256775, "step": 6170 }, { "epoch": 0.71, "learning_rate": 8.789652346950719e-08, "logits/chosen": -3.052309513092041, "logits/rejected": -2.790369987487793, "logps/chosen": -180.92266845703125, "logps/rejected": -339.5602722167969, "loss": 0.4108, "rewards/accuracies": 0.75, "rewards/chosen": 0.01766054332256317, "rewards/margins": 1.7705764770507812, "rewards/rejected": -1.7529160976409912, "step": 6171 }, { "epoch": 0.71, "learning_rate": 8.786140699988295e-08, "logits/chosen": -2.7410056591033936, "logits/rejected": -2.5396568775177, "logps/chosen": -240.76095581054688, "logps/rejected": -261.06524658203125, "loss": 0.2695, "rewards/accuracies": 0.875, "rewards/chosen": 0.43170684576034546, "rewards/margins": 1.4314970970153809, "rewards/rejected": -0.9997903108596802, "step": 6172 }, { "epoch": 0.71, "learning_rate": 8.78262905302587e-08, "logits/chosen": -2.5219969749450684, "logits/rejected": -2.393662691116333, "logps/chosen": -194.76675415039062, "logps/rejected": -259.1099853515625, "loss": 0.5426, "rewards/accuracies": 0.75, "rewards/chosen": -0.5503987669944763, "rewards/margins": 1.2846217155456543, "rewards/rejected": -1.8350204229354858, "step": 6173 }, { "epoch": 0.71, "learning_rate": 8.779117406063443e-08, "logits/chosen": -2.708181381225586, "logits/rejected": -3.340092658996582, "logps/chosen": -207.26126098632812, "logps/rejected": -194.3479461669922, "loss": 0.2872, "rewards/accuracies": 1.0, "rewards/chosen": -0.014198929071426392, "rewards/margins": 1.8059428930282593, "rewards/rejected": -1.8201419115066528, "step": 6174 }, { "epoch": 0.71, "learning_rate": 8.775605759101017e-08, "logits/chosen": -3.085787773132324, "logits/rejected": -3.1958682537078857, "logps/chosen": -252.85394287109375, "logps/rejected": -295.9222717285156, "loss": 0.2525, "rewards/accuracies": 0.875, "rewards/chosen": -0.19341155886650085, "rewards/margins": 2.4780240058898926, "rewards/rejected": -2.6714353561401367, "step": 6175 }, { "epoch": 0.71, "learning_rate": 8.772094112138593e-08, "logits/chosen": -2.4998817443847656, "logits/rejected": -2.8688364028930664, "logps/chosen": -333.1115417480469, "logps/rejected": -270.273681640625, "loss": 0.2764, "rewards/accuracies": 1.0, "rewards/chosen": 0.4241678714752197, "rewards/margins": 2.3558409214019775, "rewards/rejected": -1.9316731691360474, "step": 6176 }, { "epoch": 0.71, "learning_rate": 8.768582465176167e-08, "logits/chosen": -3.5257928371429443, "logits/rejected": -3.725550413131714, "logps/chosen": -217.21205139160156, "logps/rejected": -339.29620361328125, "loss": 0.4566, "rewards/accuracies": 0.75, "rewards/chosen": -0.6350345015525818, "rewards/margins": 2.7671358585357666, "rewards/rejected": -3.402170181274414, "step": 6177 }, { "epoch": 0.71, "learning_rate": 8.765070818213742e-08, "logits/chosen": -3.4178061485290527, "logits/rejected": -3.393282175064087, "logps/chosen": -178.123779296875, "logps/rejected": -173.11480712890625, "loss": 0.3419, "rewards/accuracies": 0.75, "rewards/chosen": 0.18634553253650665, "rewards/margins": 1.390342354774475, "rewards/rejected": -1.2039968967437744, "step": 6178 }, { "epoch": 0.71, "learning_rate": 8.761559171251316e-08, "logits/chosen": -3.253429889678955, "logits/rejected": -3.0460476875305176, "logps/chosen": -241.54005432128906, "logps/rejected": -206.118896484375, "loss": 0.3027, "rewards/accuracies": 0.75, "rewards/chosen": -0.08854848891496658, "rewards/margins": 1.6797508001327515, "rewards/rejected": -1.7682993412017822, "step": 6179 }, { "epoch": 0.71, "learning_rate": 8.758047524288892e-08, "logits/chosen": -3.504655361175537, "logits/rejected": -3.0748355388641357, "logps/chosen": -419.376220703125, "logps/rejected": -340.6866455078125, "loss": 0.2004, "rewards/accuracies": 1.0, "rewards/chosen": 0.36702775955200195, "rewards/margins": 2.025324583053589, "rewards/rejected": -1.658296823501587, "step": 6180 }, { "epoch": 0.71, "learning_rate": 8.754535877326466e-08, "logits/chosen": -3.2814135551452637, "logits/rejected": -3.247514247894287, "logps/chosen": -380.63897705078125, "logps/rejected": -357.9363098144531, "loss": 0.2018, "rewards/accuracies": 1.0, "rewards/chosen": -0.3790084719657898, "rewards/margins": 2.9755733013153076, "rewards/rejected": -3.354581832885742, "step": 6181 }, { "epoch": 0.71, "learning_rate": 8.75102423036404e-08, "logits/chosen": -3.279662847518921, "logits/rejected": -3.3672127723693848, "logps/chosen": -290.95379638671875, "logps/rejected": -213.62118530273438, "loss": 0.4995, "rewards/accuracies": 0.875, "rewards/chosen": -0.24473357200622559, "rewards/margins": 1.0393134355545044, "rewards/rejected": -1.28404700756073, "step": 6182 }, { "epoch": 0.71, "learning_rate": 8.747512583401614e-08, "logits/chosen": -3.0773234367370605, "logits/rejected": -2.968860149383545, "logps/chosen": -222.77908325195312, "logps/rejected": -196.96795654296875, "loss": 0.8737, "rewards/accuracies": 0.5, "rewards/chosen": -0.6918673515319824, "rewards/margins": 0.35349413752555847, "rewards/rejected": -1.0453613996505737, "step": 6183 }, { "epoch": 0.71, "learning_rate": 8.744000936439191e-08, "logits/chosen": -2.9693663120269775, "logits/rejected": -2.7879042625427246, "logps/chosen": -165.81851196289062, "logps/rejected": -255.0758819580078, "loss": 0.4798, "rewards/accuracies": 0.75, "rewards/chosen": 0.4908677339553833, "rewards/margins": 1.8091825246810913, "rewards/rejected": -1.318314790725708, "step": 6184 }, { "epoch": 0.71, "learning_rate": 8.740489289476765e-08, "logits/chosen": -3.6850407123565674, "logits/rejected": -3.1730594635009766, "logps/chosen": -253.9943084716797, "logps/rejected": -173.28248596191406, "loss": 0.4166, "rewards/accuracies": 0.875, "rewards/chosen": -0.36148929595947266, "rewards/margins": 1.256143569946289, "rewards/rejected": -1.6176327466964722, "step": 6185 }, { "epoch": 0.71, "learning_rate": 8.736977642514339e-08, "logits/chosen": -2.9665849208831787, "logits/rejected": -2.857121706008911, "logps/chosen": -361.24261474609375, "logps/rejected": -325.5003662109375, "loss": 0.2992, "rewards/accuracies": 0.75, "rewards/chosen": 0.011166885495185852, "rewards/margins": 2.113274574279785, "rewards/rejected": -2.1021080017089844, "step": 6186 }, { "epoch": 0.71, "learning_rate": 8.733465995551913e-08, "logits/chosen": -3.1119096279144287, "logits/rejected": -2.852790594100952, "logps/chosen": -334.0766296386719, "logps/rejected": -266.5248718261719, "loss": 0.3015, "rewards/accuracies": 0.875, "rewards/chosen": -0.016907820478081703, "rewards/margins": 1.9130665063858032, "rewards/rejected": -1.9299744367599487, "step": 6187 }, { "epoch": 0.71, "learning_rate": 8.729954348589488e-08, "logits/chosen": -3.106278419494629, "logits/rejected": -3.137249708175659, "logps/chosen": -245.6387481689453, "logps/rejected": -270.65606689453125, "loss": 0.6403, "rewards/accuracies": 0.625, "rewards/chosen": -0.34573179483413696, "rewards/margins": 0.9581091403961182, "rewards/rejected": -1.3038409948349, "step": 6188 }, { "epoch": 0.71, "learning_rate": 8.726442701627064e-08, "logits/chosen": -2.757159948348999, "logits/rejected": -2.5926108360290527, "logps/chosen": -253.1839599609375, "logps/rejected": -244.2945556640625, "loss": 0.2897, "rewards/accuracies": 0.875, "rewards/chosen": 0.5047574639320374, "rewards/margins": 2.1502790451049805, "rewards/rejected": -1.645521640777588, "step": 6189 }, { "epoch": 0.71, "learning_rate": 8.722931054664638e-08, "logits/chosen": -2.6531240940093994, "logits/rejected": -2.907480001449585, "logps/chosen": -288.8594970703125, "logps/rejected": -189.79974365234375, "loss": 0.5795, "rewards/accuracies": 0.75, "rewards/chosen": -0.09042418748140335, "rewards/margins": 0.909171462059021, "rewards/rejected": -0.999595582485199, "step": 6190 }, { "epoch": 0.71, "learning_rate": 8.719419407702212e-08, "logits/chosen": -2.966796398162842, "logits/rejected": -2.977308988571167, "logps/chosen": -190.50076293945312, "logps/rejected": -304.655517578125, "loss": 0.2148, "rewards/accuracies": 0.875, "rewards/chosen": 0.287906289100647, "rewards/margins": 3.5255918502807617, "rewards/rejected": -3.2376856803894043, "step": 6191 }, { "epoch": 0.71, "learning_rate": 8.715907760739787e-08, "logits/chosen": -3.077587842941284, "logits/rejected": -2.901366710662842, "logps/chosen": -322.4462585449219, "logps/rejected": -269.46905517578125, "loss": 0.1379, "rewards/accuracies": 1.0, "rewards/chosen": 0.16929081082344055, "rewards/margins": 2.348426103591919, "rewards/rejected": -2.179135322570801, "step": 6192 }, { "epoch": 0.71, "learning_rate": 8.712396113777361e-08, "logits/chosen": -3.0393290519714355, "logits/rejected": -3.0820765495300293, "logps/chosen": -203.51788330078125, "logps/rejected": -298.24884033203125, "loss": 0.3793, "rewards/accuracies": 0.875, "rewards/chosen": -0.2176513969898224, "rewards/margins": 1.2252179384231567, "rewards/rejected": -1.4428694248199463, "step": 6193 }, { "epoch": 0.71, "learning_rate": 8.708884466814935e-08, "logits/chosen": -3.2148985862731934, "logits/rejected": -2.8627443313598633, "logps/chosen": -303.32464599609375, "logps/rejected": -160.66232299804688, "loss": 0.2389, "rewards/accuracies": 1.0, "rewards/chosen": 0.2496528923511505, "rewards/margins": 1.8236502408981323, "rewards/rejected": -1.5739972591400146, "step": 6194 }, { "epoch": 0.71, "learning_rate": 8.70537281985251e-08, "logits/chosen": -3.4498565196990967, "logits/rejected": -3.2906782627105713, "logps/chosen": -71.97164916992188, "logps/rejected": -152.67689514160156, "loss": 0.2659, "rewards/accuracies": 0.875, "rewards/chosen": 0.24974283576011658, "rewards/margins": 2.064680576324463, "rewards/rejected": -1.8149378299713135, "step": 6195 }, { "epoch": 0.71, "learning_rate": 8.701861172890085e-08, "logits/chosen": -2.5266380310058594, "logits/rejected": -2.8225021362304688, "logps/chosen": -277.5642395019531, "logps/rejected": -407.37811279296875, "loss": 0.374, "rewards/accuracies": 0.625, "rewards/chosen": -0.12147662043571472, "rewards/margins": 1.9361413717269897, "rewards/rejected": -2.0576179027557373, "step": 6196 }, { "epoch": 0.71, "learning_rate": 8.69834952592766e-08, "logits/chosen": -3.1101553440093994, "logits/rejected": -2.8499412536621094, "logps/chosen": -276.3446044921875, "logps/rejected": -226.96554565429688, "loss": 0.6328, "rewards/accuracies": 0.75, "rewards/chosen": 0.02519528567790985, "rewards/margins": 0.7795652151107788, "rewards/rejected": -0.754369854927063, "step": 6197 }, { "epoch": 0.71, "learning_rate": 8.694837878965234e-08, "logits/chosen": -3.1416380405426025, "logits/rejected": -3.5125246047973633, "logps/chosen": -163.97061157226562, "logps/rejected": -210.718994140625, "loss": 0.2979, "rewards/accuracies": 0.875, "rewards/chosen": -0.3095620274543762, "rewards/margins": 1.8749065399169922, "rewards/rejected": -2.1844682693481445, "step": 6198 }, { "epoch": 0.71, "learning_rate": 8.691326232002808e-08, "logits/chosen": -3.170940399169922, "logits/rejected": -3.0370469093322754, "logps/chosen": -160.14724731445312, "logps/rejected": -255.4941864013672, "loss": 0.2016, "rewards/accuracies": 1.0, "rewards/chosen": 0.1996043473482132, "rewards/margins": 2.291872978210449, "rewards/rejected": -2.092268466949463, "step": 6199 }, { "epoch": 0.71, "learning_rate": 8.687814585040382e-08, "logits/chosen": -2.6889164447784424, "logits/rejected": -2.549431800842285, "logps/chosen": -342.87017822265625, "logps/rejected": -300.8272399902344, "loss": 0.7314, "rewards/accuracies": 0.375, "rewards/chosen": -0.8079954385757446, "rewards/margins": 0.828865647315979, "rewards/rejected": -1.6368610858917236, "step": 6200 }, { "epoch": 0.71, "learning_rate": 8.684302938077959e-08, "logits/chosen": -2.9095349311828613, "logits/rejected": -3.0594093799591064, "logps/chosen": -171.49676513671875, "logps/rejected": -196.35467529296875, "loss": 0.3069, "rewards/accuracies": 1.0, "rewards/chosen": -0.0025969967246055603, "rewards/margins": 1.8880863189697266, "rewards/rejected": -1.8906834125518799, "step": 6201 }, { "epoch": 0.71, "learning_rate": 8.680791291115533e-08, "logits/chosen": -2.5593883991241455, "logits/rejected": -2.491218090057373, "logps/chosen": -253.13067626953125, "logps/rejected": -210.60482788085938, "loss": 0.5266, "rewards/accuracies": 0.75, "rewards/chosen": -0.33479252457618713, "rewards/margins": 1.5333582162857056, "rewards/rejected": -1.8681507110595703, "step": 6202 }, { "epoch": 0.72, "learning_rate": 8.677279644153107e-08, "logits/chosen": -2.3442609310150146, "logits/rejected": -2.632598876953125, "logps/chosen": -216.65208435058594, "logps/rejected": -233.930908203125, "loss": 0.2363, "rewards/accuracies": 0.875, "rewards/chosen": -0.16358736157417297, "rewards/margins": 2.069136142730713, "rewards/rejected": -2.2327234745025635, "step": 6203 }, { "epoch": 0.72, "learning_rate": 8.673767997190681e-08, "logits/chosen": -2.4515955448150635, "logits/rejected": -2.681119441986084, "logps/chosen": -201.57717895507812, "logps/rejected": -142.54989624023438, "loss": 0.2814, "rewards/accuracies": 0.875, "rewards/chosen": 0.3503835201263428, "rewards/margins": 2.1500415802001953, "rewards/rejected": -1.7996578216552734, "step": 6204 }, { "epoch": 0.72, "learning_rate": 8.670256350228257e-08, "logits/chosen": -2.764801502227783, "logits/rejected": -2.8507094383239746, "logps/chosen": -218.12623596191406, "logps/rejected": -267.4840087890625, "loss": 0.1816, "rewards/accuracies": 1.0, "rewards/chosen": -0.018078655004501343, "rewards/margins": 2.997767210006714, "rewards/rejected": -3.015845775604248, "step": 6205 }, { "epoch": 0.72, "learning_rate": 8.666744703265832e-08, "logits/chosen": -3.1756718158721924, "logits/rejected": -3.290731430053711, "logps/chosen": -148.37005615234375, "logps/rejected": -216.20755004882812, "loss": 0.4638, "rewards/accuracies": 0.875, "rewards/chosen": 0.6528363823890686, "rewards/margins": 1.7571951150894165, "rewards/rejected": -1.1043587923049927, "step": 6206 }, { "epoch": 0.72, "learning_rate": 8.663233056303406e-08, "logits/chosen": -3.293508529663086, "logits/rejected": -3.318122148513794, "logps/chosen": -280.07598876953125, "logps/rejected": -296.20123291015625, "loss": 0.5995, "rewards/accuracies": 0.75, "rewards/chosen": -0.6083521842956543, "rewards/margins": 1.3540289402008057, "rewards/rejected": -1.96238112449646, "step": 6207 }, { "epoch": 0.72, "learning_rate": 8.65972140934098e-08, "logits/chosen": -2.844909906387329, "logits/rejected": -2.9814465045928955, "logps/chosen": -339.649169921875, "logps/rejected": -375.01373291015625, "loss": 0.1177, "rewards/accuracies": 1.0, "rewards/chosen": 0.77976393699646, "rewards/margins": 3.383126974105835, "rewards/rejected": -2.603363037109375, "step": 6208 }, { "epoch": 0.72, "learning_rate": 8.656209762378555e-08, "logits/chosen": -3.177168369293213, "logits/rejected": -3.4636151790618896, "logps/chosen": -422.7376403808594, "logps/rejected": -260.71148681640625, "loss": 0.3494, "rewards/accuracies": 0.875, "rewards/chosen": -0.3571590185165405, "rewards/margins": 2.2169976234436035, "rewards/rejected": -2.5741565227508545, "step": 6209 }, { "epoch": 0.72, "learning_rate": 8.65269811541613e-08, "logits/chosen": -3.0544073581695557, "logits/rejected": -3.4044296741485596, "logps/chosen": -250.47122192382812, "logps/rejected": -245.9281768798828, "loss": 0.3694, "rewards/accuracies": 0.875, "rewards/chosen": -0.056384071707725525, "rewards/margins": 1.635916829109192, "rewards/rejected": -1.692300796508789, "step": 6210 }, { "epoch": 0.72, "learning_rate": 8.649186468453704e-08, "logits/chosen": -3.673053741455078, "logits/rejected": -3.769381284713745, "logps/chosen": -255.92332458496094, "logps/rejected": -278.6451721191406, "loss": 0.1643, "rewards/accuracies": 1.0, "rewards/chosen": -0.1006774753332138, "rewards/margins": 3.181764841079712, "rewards/rejected": -3.282442331314087, "step": 6211 }, { "epoch": 0.72, "learning_rate": 8.645674821491279e-08, "logits/chosen": -3.628016948699951, "logits/rejected": -3.6785573959350586, "logps/chosen": -179.7212371826172, "logps/rejected": -187.4581756591797, "loss": 0.2539, "rewards/accuracies": 0.875, "rewards/chosen": 0.2319963425397873, "rewards/margins": 2.410912036895752, "rewards/rejected": -2.1789159774780273, "step": 6212 }, { "epoch": 0.72, "learning_rate": 8.642163174528854e-08, "logits/chosen": -3.040497303009033, "logits/rejected": -2.7998287677764893, "logps/chosen": -325.7772216796875, "logps/rejected": -210.19358825683594, "loss": 0.3179, "rewards/accuracies": 0.75, "rewards/chosen": 0.4744609594345093, "rewards/margins": 2.0446083545684814, "rewards/rejected": -1.5701473951339722, "step": 6213 }, { "epoch": 0.72, "learning_rate": 8.638651527566428e-08, "logits/chosen": -3.5246849060058594, "logits/rejected": -3.673649311065674, "logps/chosen": -190.5153350830078, "logps/rejected": -194.64244079589844, "loss": 0.4543, "rewards/accuracies": 0.625, "rewards/chosen": -0.3522980809211731, "rewards/margins": 1.897334098815918, "rewards/rejected": -2.2496321201324463, "step": 6214 }, { "epoch": 0.72, "learning_rate": 8.635139880604002e-08, "logits/chosen": -3.084000587463379, "logits/rejected": -3.049778938293457, "logps/chosen": -175.01821899414062, "logps/rejected": -207.11251831054688, "loss": 0.3954, "rewards/accuracies": 0.875, "rewards/chosen": -0.013409394770860672, "rewards/margins": 2.30769944190979, "rewards/rejected": -2.321108818054199, "step": 6215 }, { "epoch": 0.72, "learning_rate": 8.631628233641577e-08, "logits/chosen": -3.260829448699951, "logits/rejected": -3.3587207794189453, "logps/chosen": -308.1179504394531, "logps/rejected": -208.18878173828125, "loss": 0.3247, "rewards/accuracies": 0.875, "rewards/chosen": -0.2245536744594574, "rewards/margins": 2.6164565086364746, "rewards/rejected": -2.841010093688965, "step": 6216 }, { "epoch": 0.72, "learning_rate": 8.628116586679153e-08, "logits/chosen": -2.3655714988708496, "logits/rejected": -2.9127345085144043, "logps/chosen": -112.45896911621094, "logps/rejected": -214.22207641601562, "loss": 0.3966, "rewards/accuracies": 0.875, "rewards/chosen": -0.3266565799713135, "rewards/margins": 2.041274070739746, "rewards/rejected": -2.3679306507110596, "step": 6217 }, { "epoch": 0.72, "learning_rate": 8.624604939716727e-08, "logits/chosen": -2.460184335708618, "logits/rejected": -2.378178119659424, "logps/chosen": -132.47740173339844, "logps/rejected": -231.49209594726562, "loss": 0.3952, "rewards/accuracies": 0.625, "rewards/chosen": -0.3082536458969116, "rewards/margins": 2.820432186126709, "rewards/rejected": -3.128685712814331, "step": 6218 }, { "epoch": 0.72, "learning_rate": 8.621093292754301e-08, "logits/chosen": -2.659320116043091, "logits/rejected": -2.5503013134002686, "logps/chosen": -162.3297882080078, "logps/rejected": -280.72161865234375, "loss": 0.1946, "rewards/accuracies": 0.875, "rewards/chosen": 0.7030532360076904, "rewards/margins": 3.3834805488586426, "rewards/rejected": -2.680427074432373, "step": 6219 }, { "epoch": 0.72, "learning_rate": 8.617581645791875e-08, "logits/chosen": -3.464320182800293, "logits/rejected": -3.3932673931121826, "logps/chosen": -212.67864990234375, "logps/rejected": -233.3140869140625, "loss": 0.2289, "rewards/accuracies": 0.875, "rewards/chosen": 0.5519707202911377, "rewards/margins": 2.6302335262298584, "rewards/rejected": -2.0782630443573, "step": 6220 }, { "epoch": 0.72, "learning_rate": 8.614069998829451e-08, "logits/chosen": -3.3898746967315674, "logits/rejected": -3.865929365158081, "logps/chosen": -79.1580810546875, "logps/rejected": -156.52774047851562, "loss": 0.2322, "rewards/accuracies": 0.75, "rewards/chosen": 0.6686815023422241, "rewards/margins": 3.0764858722686768, "rewards/rejected": -2.407804489135742, "step": 6221 }, { "epoch": 0.72, "learning_rate": 8.610558351867025e-08, "logits/chosen": -3.5092577934265137, "logits/rejected": -3.650083303451538, "logps/chosen": -309.0154724121094, "logps/rejected": -272.9697265625, "loss": 0.4819, "rewards/accuracies": 0.875, "rewards/chosen": -0.23302705585956573, "rewards/margins": 0.9344514608383179, "rewards/rejected": -1.1674785614013672, "step": 6222 }, { "epoch": 0.72, "learning_rate": 8.6070467049046e-08, "logits/chosen": -2.97107195854187, "logits/rejected": -2.852827548980713, "logps/chosen": -193.140625, "logps/rejected": -249.558349609375, "loss": 0.6357, "rewards/accuracies": 0.75, "rewards/chosen": -0.23762360215187073, "rewards/margins": 1.0629180669784546, "rewards/rejected": -1.300541639328003, "step": 6223 }, { "epoch": 0.72, "learning_rate": 8.603535057942174e-08, "logits/chosen": -2.468775749206543, "logits/rejected": -2.7047958374023438, "logps/chosen": -182.60696411132812, "logps/rejected": -300.49652099609375, "loss": 0.3337, "rewards/accuracies": 0.875, "rewards/chosen": -0.1447678506374359, "rewards/margins": 2.4765188694000244, "rewards/rejected": -2.621286630630493, "step": 6224 }, { "epoch": 0.72, "learning_rate": 8.60002341097975e-08, "logits/chosen": -2.9897100925445557, "logits/rejected": -3.1734156608581543, "logps/chosen": -371.2322998046875, "logps/rejected": -410.45184326171875, "loss": 0.3245, "rewards/accuracies": 0.875, "rewards/chosen": 0.14682847261428833, "rewards/margins": 1.5548166036605835, "rewards/rejected": -1.4079880714416504, "step": 6225 }, { "epoch": 0.72, "learning_rate": 8.596511764017324e-08, "logits/chosen": -2.342602252960205, "logits/rejected": -2.0993754863739014, "logps/chosen": -198.84552001953125, "logps/rejected": -278.88336181640625, "loss": 0.8826, "rewards/accuracies": 0.625, "rewards/chosen": -0.6174329519271851, "rewards/margins": 1.1297760009765625, "rewards/rejected": -1.747209072113037, "step": 6226 }, { "epoch": 0.72, "learning_rate": 8.593000117054898e-08, "logits/chosen": -2.5627217292785645, "logits/rejected": -2.472756862640381, "logps/chosen": -344.158447265625, "logps/rejected": -376.7000732421875, "loss": 0.329, "rewards/accuracies": 0.875, "rewards/chosen": -0.2511926591396332, "rewards/margins": 2.1316771507263184, "rewards/rejected": -2.3828694820404053, "step": 6227 }, { "epoch": 0.72, "learning_rate": 8.589488470092472e-08, "logits/chosen": -2.4449191093444824, "logits/rejected": -2.8248982429504395, "logps/chosen": -350.2446594238281, "logps/rejected": -337.0544738769531, "loss": 0.2587, "rewards/accuracies": 0.875, "rewards/chosen": 0.3733539879322052, "rewards/margins": 2.280017852783203, "rewards/rejected": -1.9066637754440308, "step": 6228 }, { "epoch": 0.72, "learning_rate": 8.585976823130049e-08, "logits/chosen": -2.95918607711792, "logits/rejected": -2.9900777339935303, "logps/chosen": -155.2025146484375, "logps/rejected": -153.85064697265625, "loss": 0.5351, "rewards/accuracies": 0.75, "rewards/chosen": -0.2898723781108856, "rewards/margins": 0.5406811833381653, "rewards/rejected": -0.8305535316467285, "step": 6229 }, { "epoch": 0.72, "learning_rate": 8.582465176167623e-08, "logits/chosen": -3.568181037902832, "logits/rejected": -3.4104301929473877, "logps/chosen": -270.2089538574219, "logps/rejected": -212.7761993408203, "loss": 0.2276, "rewards/accuracies": 1.0, "rewards/chosen": 0.2646157741546631, "rewards/margins": 2.255439519882202, "rewards/rejected": -1.990823745727539, "step": 6230 }, { "epoch": 0.72, "learning_rate": 8.578953529205197e-08, "logits/chosen": -2.601757526397705, "logits/rejected": -2.986440896987915, "logps/chosen": -356.9673767089844, "logps/rejected": -308.0239562988281, "loss": 0.3803, "rewards/accuracies": 0.75, "rewards/chosen": -0.10056573152542114, "rewards/margins": 1.7811245918273926, "rewards/rejected": -1.881690263748169, "step": 6231 }, { "epoch": 0.72, "learning_rate": 8.575441882242771e-08, "logits/chosen": -2.46207594871521, "logits/rejected": -2.6854052543640137, "logps/chosen": -285.23443603515625, "logps/rejected": -170.55178833007812, "loss": 0.6893, "rewards/accuracies": 0.625, "rewards/chosen": -0.4156138300895691, "rewards/margins": 0.7287877202033997, "rewards/rejected": -1.1444015502929688, "step": 6232 }, { "epoch": 0.72, "learning_rate": 8.571930235280347e-08, "logits/chosen": -2.7972702980041504, "logits/rejected": -2.8190526962280273, "logps/chosen": -220.203125, "logps/rejected": -214.31539916992188, "loss": 0.2653, "rewards/accuracies": 1.0, "rewards/chosen": 0.015666097402572632, "rewards/margins": 2.0984582901000977, "rewards/rejected": -2.082792043685913, "step": 6233 }, { "epoch": 0.72, "learning_rate": 8.568418588317922e-08, "logits/chosen": -3.277355670928955, "logits/rejected": -3.033801794052124, "logps/chosen": -137.0720977783203, "logps/rejected": -139.5054931640625, "loss": 0.6073, "rewards/accuracies": 0.625, "rewards/chosen": -0.11476925760507584, "rewards/margins": 0.42781922221183777, "rewards/rejected": -0.542588472366333, "step": 6234 }, { "epoch": 0.72, "learning_rate": 8.564906941355496e-08, "logits/chosen": -2.894564151763916, "logits/rejected": -2.8224639892578125, "logps/chosen": -317.6152648925781, "logps/rejected": -224.19076538085938, "loss": 0.6905, "rewards/accuracies": 0.625, "rewards/chosen": -0.41659748554229736, "rewards/margins": 0.7525492310523987, "rewards/rejected": -1.1691466569900513, "step": 6235 }, { "epoch": 0.72, "learning_rate": 8.56139529439307e-08, "logits/chosen": -3.1319923400878906, "logits/rejected": -2.718477487564087, "logps/chosen": -168.4676055908203, "logps/rejected": -177.952880859375, "loss": 0.2083, "rewards/accuracies": 0.875, "rewards/chosen": 0.10882195830345154, "rewards/margins": 2.1111302375793457, "rewards/rejected": -2.0023083686828613, "step": 6236 }, { "epoch": 0.72, "learning_rate": 8.557883647430645e-08, "logits/chosen": -2.865548849105835, "logits/rejected": -2.949894428253174, "logps/chosen": -264.7803039550781, "logps/rejected": -140.4404754638672, "loss": 0.8792, "rewards/accuracies": 0.5, "rewards/chosen": -0.5142704844474792, "rewards/margins": 0.2607874274253845, "rewards/rejected": -0.7750579118728638, "step": 6237 }, { "epoch": 0.72, "learning_rate": 8.554372000468219e-08, "logits/chosen": -2.6610703468322754, "logits/rejected": -2.7421576976776123, "logps/chosen": -293.1398620605469, "logps/rejected": -278.7167663574219, "loss": 0.2158, "rewards/accuracies": 1.0, "rewards/chosen": 0.18916448950767517, "rewards/margins": 2.3520398139953613, "rewards/rejected": -2.1628754138946533, "step": 6238 }, { "epoch": 0.72, "learning_rate": 8.550860353505793e-08, "logits/chosen": -2.848931312561035, "logits/rejected": -2.8345346450805664, "logps/chosen": -306.6490783691406, "logps/rejected": -135.3790283203125, "loss": 0.6323, "rewards/accuracies": 0.75, "rewards/chosen": -0.6681374311447144, "rewards/margins": 0.3910433053970337, "rewards/rejected": -1.059180736541748, "step": 6239 }, { "epoch": 0.72, "learning_rate": 8.547348706543369e-08, "logits/chosen": -3.6734566688537598, "logits/rejected": -3.3634095191955566, "logps/chosen": -133.25161743164062, "logps/rejected": -145.0244903564453, "loss": 0.3959, "rewards/accuracies": 0.875, "rewards/chosen": -0.255374014377594, "rewards/margins": 1.3699357509613037, "rewards/rejected": -1.6253098249435425, "step": 6240 }, { "epoch": 0.72, "learning_rate": 8.543837059580943e-08, "logits/chosen": -3.1436634063720703, "logits/rejected": -3.343234062194824, "logps/chosen": -146.49276733398438, "logps/rejected": -278.5816650390625, "loss": 0.3269, "rewards/accuracies": 0.875, "rewards/chosen": 0.22890326380729675, "rewards/margins": 1.8236750364303589, "rewards/rejected": -1.5947718620300293, "step": 6241 }, { "epoch": 0.72, "learning_rate": 8.540325412618518e-08, "logits/chosen": -3.0953752994537354, "logits/rejected": -3.009274482727051, "logps/chosen": -274.2269287109375, "logps/rejected": -221.4766082763672, "loss": 0.9573, "rewards/accuracies": 0.375, "rewards/chosen": -0.9080827832221985, "rewards/margins": 0.6636852025985718, "rewards/rejected": -1.5717679262161255, "step": 6242 }, { "epoch": 0.72, "learning_rate": 8.536813765656092e-08, "logits/chosen": -2.9422683715820312, "logits/rejected": -2.982396125793457, "logps/chosen": -155.53924560546875, "logps/rejected": -217.11761474609375, "loss": 0.3882, "rewards/accuracies": 0.75, "rewards/chosen": 0.11280552297830582, "rewards/margins": 3.145951271057129, "rewards/rejected": -3.0331454277038574, "step": 6243 }, { "epoch": 0.72, "learning_rate": 8.533302118693666e-08, "logits/chosen": -3.546570062637329, "logits/rejected": -2.983919858932495, "logps/chosen": -242.14044189453125, "logps/rejected": -262.06158447265625, "loss": 0.3041, "rewards/accuracies": 0.875, "rewards/chosen": -0.21760404109954834, "rewards/margins": 2.1296586990356445, "rewards/rejected": -2.3472626209259033, "step": 6244 }, { "epoch": 0.72, "learning_rate": 8.52979047173124e-08, "logits/chosen": -2.847583293914795, "logits/rejected": -2.9833896160125732, "logps/chosen": -316.98907470703125, "logps/rejected": -331.26763916015625, "loss": 0.2573, "rewards/accuracies": 0.875, "rewards/chosen": 0.10652070492506027, "rewards/margins": 2.216336250305176, "rewards/rejected": -2.1098155975341797, "step": 6245 }, { "epoch": 0.72, "learning_rate": 8.526278824768817e-08, "logits/chosen": -3.1121582984924316, "logits/rejected": -2.956580400466919, "logps/chosen": -402.4575500488281, "logps/rejected": -356.32891845703125, "loss": 0.1775, "rewards/accuracies": 1.0, "rewards/chosen": 0.1179531067609787, "rewards/margins": 2.0528955459594727, "rewards/rejected": -1.9349424839019775, "step": 6246 }, { "epoch": 0.72, "learning_rate": 8.522767177806391e-08, "logits/chosen": -3.5689611434936523, "logits/rejected": -3.6692147254943848, "logps/chosen": -242.9729461669922, "logps/rejected": -307.6684265136719, "loss": 0.8477, "rewards/accuracies": 0.375, "rewards/chosen": -1.5586351156234741, "rewards/margins": 0.29153257608413696, "rewards/rejected": -1.8501675128936768, "step": 6247 }, { "epoch": 0.72, "learning_rate": 8.519255530843965e-08, "logits/chosen": -3.2566511631011963, "logits/rejected": -3.240741729736328, "logps/chosen": -196.38543701171875, "logps/rejected": -269.51617431640625, "loss": 0.6098, "rewards/accuracies": 0.625, "rewards/chosen": -0.011288568377494812, "rewards/margins": 1.4440367221832275, "rewards/rejected": -1.4553251266479492, "step": 6248 }, { "epoch": 0.72, "learning_rate": 8.515743883881539e-08, "logits/chosen": -3.088057518005371, "logits/rejected": -3.1470632553100586, "logps/chosen": -264.2438659667969, "logps/rejected": -245.93338012695312, "loss": 0.1388, "rewards/accuracies": 1.0, "rewards/chosen": 0.646129846572876, "rewards/margins": 2.357839822769165, "rewards/rejected": -1.7117100954055786, "step": 6249 }, { "epoch": 0.72, "learning_rate": 8.512232236919116e-08, "logits/chosen": -2.616626262664795, "logits/rejected": -2.740483522415161, "logps/chosen": -254.18821716308594, "logps/rejected": -361.4554443359375, "loss": 0.3566, "rewards/accuracies": 0.75, "rewards/chosen": -0.9553130269050598, "rewards/margins": 2.4873809814453125, "rewards/rejected": -3.4426939487457275, "step": 6250 }, { "epoch": 0.72, "learning_rate": 8.50872058995669e-08, "logits/chosen": -2.6419901847839355, "logits/rejected": -2.5201663970947266, "logps/chosen": -213.8834228515625, "logps/rejected": -210.63494873046875, "loss": 0.2782, "rewards/accuracies": 1.0, "rewards/chosen": 0.10980918258428574, "rewards/margins": 1.4906436204910278, "rewards/rejected": -1.3808345794677734, "step": 6251 }, { "epoch": 0.72, "learning_rate": 8.505208942994264e-08, "logits/chosen": -3.526102066040039, "logits/rejected": -3.274887800216675, "logps/chosen": -200.23143005371094, "logps/rejected": -156.93850708007812, "loss": 0.3908, "rewards/accuracies": 0.75, "rewards/chosen": 0.04509430378675461, "rewards/margins": 2.0682435035705566, "rewards/rejected": -2.023149251937866, "step": 6252 }, { "epoch": 0.72, "learning_rate": 8.501697296031838e-08, "logits/chosen": -2.593017101287842, "logits/rejected": -2.6330161094665527, "logps/chosen": -111.45880889892578, "logps/rejected": -130.27725219726562, "loss": 0.362, "rewards/accuracies": 0.75, "rewards/chosen": -0.1506037414073944, "rewards/margins": 1.2883274555206299, "rewards/rejected": -1.4389312267303467, "step": 6253 }, { "epoch": 0.72, "learning_rate": 8.498185649069413e-08, "logits/chosen": -2.60383939743042, "logits/rejected": -2.5834503173828125, "logps/chosen": -183.40931701660156, "logps/rejected": -337.6494140625, "loss": 0.2824, "rewards/accuracies": 0.75, "rewards/chosen": -0.4244502782821655, "rewards/margins": 3.612639904022217, "rewards/rejected": -4.037090301513672, "step": 6254 }, { "epoch": 0.72, "learning_rate": 8.494674002106987e-08, "logits/chosen": -3.1588120460510254, "logits/rejected": -2.947680950164795, "logps/chosen": -339.39874267578125, "logps/rejected": -296.22723388671875, "loss": 0.4758, "rewards/accuracies": 0.625, "rewards/chosen": -0.31119251251220703, "rewards/margins": 1.5254976749420166, "rewards/rejected": -1.8366901874542236, "step": 6255 }, { "epoch": 0.72, "learning_rate": 8.491162355144562e-08, "logits/chosen": -2.847172737121582, "logits/rejected": -2.747995138168335, "logps/chosen": -185.7191162109375, "logps/rejected": -236.264404296875, "loss": 0.2347, "rewards/accuracies": 1.0, "rewards/chosen": -0.026789769530296326, "rewards/margins": 1.7865556478500366, "rewards/rejected": -1.8133454322814941, "step": 6256 }, { "epoch": 0.72, "learning_rate": 8.487650708182137e-08, "logits/chosen": -3.273263692855835, "logits/rejected": -3.5938992500305176, "logps/chosen": -218.46710205078125, "logps/rejected": -206.01931762695312, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": 0.45577287673950195, "rewards/margins": 3.357509136199951, "rewards/rejected": -2.901736259460449, "step": 6257 }, { "epoch": 0.72, "learning_rate": 8.484139061219712e-08, "logits/chosen": -2.8537702560424805, "logits/rejected": -2.9218153953552246, "logps/chosen": -300.13616943359375, "logps/rejected": -339.1453857421875, "loss": 0.2422, "rewards/accuracies": 0.875, "rewards/chosen": -0.09189091622829437, "rewards/margins": 2.1648545265197754, "rewards/rejected": -2.2567451000213623, "step": 6258 }, { "epoch": 0.72, "learning_rate": 8.480627414257286e-08, "logits/chosen": -3.386720657348633, "logits/rejected": -3.640533685684204, "logps/chosen": -138.34902954101562, "logps/rejected": -137.23846435546875, "loss": 0.3135, "rewards/accuracies": 0.875, "rewards/chosen": -0.26158368587493896, "rewards/margins": 1.739933967590332, "rewards/rejected": -2.0015177726745605, "step": 6259 }, { "epoch": 0.72, "learning_rate": 8.47711576729486e-08, "logits/chosen": -3.0016279220581055, "logits/rejected": -3.2234678268432617, "logps/chosen": -208.80181884765625, "logps/rejected": -231.61959838867188, "loss": 0.328, "rewards/accuracies": 0.875, "rewards/chosen": -0.20624268054962158, "rewards/margins": 1.494094729423523, "rewards/rejected": -1.700337290763855, "step": 6260 }, { "epoch": 0.72, "learning_rate": 8.473604120332434e-08, "logits/chosen": -3.2461695671081543, "logits/rejected": -3.3387646675109863, "logps/chosen": -218.27008056640625, "logps/rejected": -276.112548828125, "loss": 0.3873, "rewards/accuracies": 0.75, "rewards/chosen": -0.16898766160011292, "rewards/margins": 3.4332799911499023, "rewards/rejected": -3.6022677421569824, "step": 6261 }, { "epoch": 0.72, "learning_rate": 8.470092473370011e-08, "logits/chosen": -2.8552987575531006, "logits/rejected": -3.047182321548462, "logps/chosen": -136.90223693847656, "logps/rejected": -314.7531433105469, "loss": 0.4548, "rewards/accuracies": 0.625, "rewards/chosen": 0.03014979138970375, "rewards/margins": 1.6778748035430908, "rewards/rejected": -1.6477251052856445, "step": 6262 }, { "epoch": 0.72, "learning_rate": 8.466580826407585e-08, "logits/chosen": -2.9430160522460938, "logits/rejected": -2.8224093914031982, "logps/chosen": -379.164794921875, "logps/rejected": -316.68121337890625, "loss": 0.1365, "rewards/accuracies": 0.875, "rewards/chosen": 0.828264057636261, "rewards/margins": 2.887794017791748, "rewards/rejected": -2.0595297813415527, "step": 6263 }, { "epoch": 0.72, "learning_rate": 8.463069179445159e-08, "logits/chosen": -2.982023000717163, "logits/rejected": -2.8268449306488037, "logps/chosen": -140.92398071289062, "logps/rejected": -224.25408935546875, "loss": 0.5799, "rewards/accuracies": 0.75, "rewards/chosen": -0.1352217197418213, "rewards/margins": 0.8748186826705933, "rewards/rejected": -1.0100404024124146, "step": 6264 }, { "epoch": 0.72, "learning_rate": 8.459557532482733e-08, "logits/chosen": -3.3811850547790527, "logits/rejected": -3.5541558265686035, "logps/chosen": -175.58084106445312, "logps/rejected": -174.06512451171875, "loss": 0.3416, "rewards/accuracies": 1.0, "rewards/chosen": 0.06171835958957672, "rewards/margins": 1.8693888187408447, "rewards/rejected": -1.8076703548431396, "step": 6265 }, { "epoch": 0.72, "learning_rate": 8.456045885520309e-08, "logits/chosen": -3.5368447303771973, "logits/rejected": -3.564699649810791, "logps/chosen": -347.0350036621094, "logps/rejected": -306.87115478515625, "loss": 0.8387, "rewards/accuracies": 0.75, "rewards/chosen": -0.49883148074150085, "rewards/margins": 1.1292953491210938, "rewards/rejected": -1.628126621246338, "step": 6266 }, { "epoch": 0.72, "learning_rate": 8.452534238557884e-08, "logits/chosen": -3.1877639293670654, "logits/rejected": -3.0579466819763184, "logps/chosen": -249.658935546875, "logps/rejected": -257.7124938964844, "loss": 0.4991, "rewards/accuracies": 0.625, "rewards/chosen": -0.22231227159500122, "rewards/margins": 0.8941972851753235, "rewards/rejected": -1.1165095567703247, "step": 6267 }, { "epoch": 0.72, "learning_rate": 8.449022591595458e-08, "logits/chosen": -3.3912158012390137, "logits/rejected": -3.7296178340911865, "logps/chosen": -270.26458740234375, "logps/rejected": -351.25482177734375, "loss": 0.7967, "rewards/accuracies": 0.75, "rewards/chosen": -0.731343150138855, "rewards/margins": 0.803435206413269, "rewards/rejected": -1.534778356552124, "step": 6268 }, { "epoch": 0.72, "learning_rate": 8.445510944633032e-08, "logits/chosen": -3.0934033393859863, "logits/rejected": -3.5140411853790283, "logps/chosen": -162.88919067382812, "logps/rejected": -191.71502685546875, "loss": 0.3182, "rewards/accuracies": 0.875, "rewards/chosen": -0.41422122716903687, "rewards/margins": 1.5621792078018188, "rewards/rejected": -1.976400375366211, "step": 6269 }, { "epoch": 0.72, "learning_rate": 8.441999297670608e-08, "logits/chosen": -2.909179925918579, "logits/rejected": -2.954573631286621, "logps/chosen": -185.3433837890625, "logps/rejected": -208.2130584716797, "loss": 0.4458, "rewards/accuracies": 0.875, "rewards/chosen": 0.16982579231262207, "rewards/margins": 1.4464986324310303, "rewards/rejected": -1.2766729593276978, "step": 6270 }, { "epoch": 0.72, "learning_rate": 8.438487650708182e-08, "logits/chosen": -3.0328311920166016, "logits/rejected": -2.981339454650879, "logps/chosen": -162.599609375, "logps/rejected": -219.60101318359375, "loss": 0.2512, "rewards/accuracies": 0.875, "rewards/chosen": -0.17381522059440613, "rewards/margins": 1.8205586671829224, "rewards/rejected": -1.9943739175796509, "step": 6271 }, { "epoch": 0.72, "learning_rate": 8.434976003745756e-08, "logits/chosen": -2.5618948936462402, "logits/rejected": -2.5975489616394043, "logps/chosen": -98.26287841796875, "logps/rejected": -192.7917022705078, "loss": 0.325, "rewards/accuracies": 0.875, "rewards/chosen": -0.005255177617073059, "rewards/margins": 1.5950639247894287, "rewards/rejected": -1.6003190279006958, "step": 6272 }, { "epoch": 0.72, "learning_rate": 8.43146435678333e-08, "logits/chosen": -3.4478249549865723, "logits/rejected": -3.3231570720672607, "logps/chosen": -260.5186767578125, "logps/rejected": -303.4814453125, "loss": 0.1769, "rewards/accuracies": 1.0, "rewards/chosen": 0.37039822340011597, "rewards/margins": 2.557544231414795, "rewards/rejected": -2.1871461868286133, "step": 6273 }, { "epoch": 0.72, "learning_rate": 8.427952709820907e-08, "logits/chosen": -3.2165169715881348, "logits/rejected": -3.0515427589416504, "logps/chosen": -150.4286651611328, "logps/rejected": -163.34841918945312, "loss": 0.3795, "rewards/accuracies": 0.875, "rewards/chosen": 0.3325687050819397, "rewards/margins": 1.626227617263794, "rewards/rejected": -1.293658971786499, "step": 6274 }, { "epoch": 0.72, "learning_rate": 8.42444106285848e-08, "logits/chosen": -2.867514133453369, "logits/rejected": -2.816272735595703, "logps/chosen": -374.82489013671875, "logps/rejected": -194.81591796875, "loss": 0.5881, "rewards/accuracies": 0.625, "rewards/chosen": -0.2747151851654053, "rewards/margins": 0.7732227444648743, "rewards/rejected": -1.0479379892349243, "step": 6275 }, { "epoch": 0.72, "learning_rate": 8.420929415896055e-08, "logits/chosen": -3.601177215576172, "logits/rejected": -3.122390031814575, "logps/chosen": -250.98818969726562, "logps/rejected": -221.5794677734375, "loss": 0.313, "rewards/accuracies": 0.875, "rewards/chosen": 0.4452561140060425, "rewards/margins": 1.8691513538360596, "rewards/rejected": -1.4238951206207275, "step": 6276 }, { "epoch": 0.72, "learning_rate": 8.417417768933629e-08, "logits/chosen": -3.3579907417297363, "logits/rejected": -3.6270580291748047, "logps/chosen": -190.69354248046875, "logps/rejected": -322.753173828125, "loss": 0.3783, "rewards/accuracies": 0.875, "rewards/chosen": 0.39213645458221436, "rewards/margins": 1.87322199344635, "rewards/rejected": -1.4810855388641357, "step": 6277 }, { "epoch": 0.72, "learning_rate": 8.413906121971205e-08, "logits/chosen": -3.984138011932373, "logits/rejected": -3.3709700107574463, "logps/chosen": -241.1252899169922, "logps/rejected": -172.7447052001953, "loss": 0.4307, "rewards/accuracies": 0.875, "rewards/chosen": 0.15482193231582642, "rewards/margins": 1.2046866416931152, "rewards/rejected": -1.0498647689819336, "step": 6278 }, { "epoch": 0.72, "learning_rate": 8.41039447500878e-08, "logits/chosen": -3.11362624168396, "logits/rejected": -2.725102424621582, "logps/chosen": -443.1846618652344, "logps/rejected": -326.4090576171875, "loss": 0.563, "rewards/accuracies": 0.875, "rewards/chosen": -0.9030854105949402, "rewards/margins": 1.71173095703125, "rewards/rejected": -2.614816188812256, "step": 6279 }, { "epoch": 0.72, "learning_rate": 8.406882828046354e-08, "logits/chosen": -2.859363079071045, "logits/rejected": -2.7622170448303223, "logps/chosen": -267.2108459472656, "logps/rejected": -248.2975311279297, "loss": 0.2613, "rewards/accuracies": 0.875, "rewards/chosen": -0.24156567454338074, "rewards/margins": 3.2222490310668945, "rewards/rejected": -3.4638147354125977, "step": 6280 }, { "epoch": 0.72, "learning_rate": 8.403371181083928e-08, "logits/chosen": -3.3904356956481934, "logits/rejected": -3.4035303592681885, "logps/chosen": -397.56695556640625, "logps/rejected": -380.6833190917969, "loss": 0.159, "rewards/accuracies": 1.0, "rewards/chosen": -0.009605005383491516, "rewards/margins": 2.7309536933898926, "rewards/rejected": -2.740558624267578, "step": 6281 }, { "epoch": 0.72, "learning_rate": 8.399859534121503e-08, "logits/chosen": -3.179352283477783, "logits/rejected": -3.0661356449127197, "logps/chosen": -291.62567138671875, "logps/rejected": -203.4659423828125, "loss": 0.423, "rewards/accuracies": 0.625, "rewards/chosen": 0.0720459520816803, "rewards/margins": 1.2864990234375, "rewards/rejected": -1.2144529819488525, "step": 6282 }, { "epoch": 0.72, "learning_rate": 8.396347887159077e-08, "logits/chosen": -2.5746030807495117, "logits/rejected": -2.355295181274414, "logps/chosen": -358.5271301269531, "logps/rejected": -389.95880126953125, "loss": 0.6883, "rewards/accuracies": 0.625, "rewards/chosen": -0.4041932225227356, "rewards/margins": 0.38582074642181396, "rewards/rejected": -0.7900140285491943, "step": 6283 }, { "epoch": 0.72, "learning_rate": 8.392836240196652e-08, "logits/chosen": -2.477261781692505, "logits/rejected": -2.4821016788482666, "logps/chosen": -200.21766662597656, "logps/rejected": -156.7978973388672, "loss": 0.5477, "rewards/accuracies": 0.75, "rewards/chosen": -0.5151647329330444, "rewards/margins": 0.8082389235496521, "rewards/rejected": -1.3234035968780518, "step": 6284 }, { "epoch": 0.72, "learning_rate": 8.389324593234227e-08, "logits/chosen": -2.5965070724487305, "logits/rejected": -2.547884702682495, "logps/chosen": -348.1279296875, "logps/rejected": -263.2189025878906, "loss": 0.3385, "rewards/accuracies": 0.875, "rewards/chosen": 0.09889373183250427, "rewards/margins": 1.7601655721664429, "rewards/rejected": -1.6612718105316162, "step": 6285 }, { "epoch": 0.72, "learning_rate": 8.385812946271802e-08, "logits/chosen": -2.3406271934509277, "logits/rejected": -2.4149844646453857, "logps/chosen": -318.87615966796875, "logps/rejected": -240.38192749023438, "loss": 0.6432, "rewards/accuracies": 0.5, "rewards/chosen": -0.5517070889472961, "rewards/margins": 1.0659584999084473, "rewards/rejected": -1.6176655292510986, "step": 6286 }, { "epoch": 0.72, "learning_rate": 8.382301299309376e-08, "logits/chosen": -3.260585308074951, "logits/rejected": -3.2350287437438965, "logps/chosen": -372.6209411621094, "logps/rejected": -251.24610900878906, "loss": 0.3409, "rewards/accuracies": 0.875, "rewards/chosen": 0.01149781048297882, "rewards/margins": 1.3019291162490845, "rewards/rejected": -1.290431261062622, "step": 6287 }, { "epoch": 0.72, "learning_rate": 8.37878965234695e-08, "logits/chosen": -2.6980996131896973, "logits/rejected": -2.7562761306762695, "logps/chosen": -205.42996215820312, "logps/rejected": -253.03916931152344, "loss": 0.1743, "rewards/accuracies": 1.0, "rewards/chosen": 0.14497989416122437, "rewards/margins": 2.63859224319458, "rewards/rejected": -2.493612289428711, "step": 6288 }, { "epoch": 0.72, "learning_rate": 8.375278005384524e-08, "logits/chosen": -3.0154929161071777, "logits/rejected": -3.160339117050171, "logps/chosen": -510.25537109375, "logps/rejected": -162.481201171875, "loss": 0.8945, "rewards/accuracies": 0.5, "rewards/chosen": -0.7747213840484619, "rewards/margins": 0.2632142901420593, "rewards/rejected": -1.037935733795166, "step": 6289 }, { "epoch": 0.73, "learning_rate": 8.371766358422098e-08, "logits/chosen": -3.520028591156006, "logits/rejected": -3.06062650680542, "logps/chosen": -366.04168701171875, "logps/rejected": -228.6161651611328, "loss": 0.3049, "rewards/accuracies": 0.75, "rewards/chosen": -0.26746052503585815, "rewards/margins": 2.2751567363739014, "rewards/rejected": -2.5426173210144043, "step": 6290 }, { "epoch": 0.73, "learning_rate": 8.368254711459675e-08, "logits/chosen": -3.4397199153900146, "logits/rejected": -3.4305295944213867, "logps/chosen": -320.2738342285156, "logps/rejected": -294.5267639160156, "loss": 0.2737, "rewards/accuracies": 0.875, "rewards/chosen": -0.1618047058582306, "rewards/margins": 1.9696441888809204, "rewards/rejected": -2.131448984146118, "step": 6291 }, { "epoch": 0.73, "learning_rate": 8.364743064497249e-08, "logits/chosen": -3.214134454727173, "logits/rejected": -3.80350661277771, "logps/chosen": -152.89271545410156, "logps/rejected": -312.3417053222656, "loss": 0.3712, "rewards/accuracies": 0.875, "rewards/chosen": -1.018940806388855, "rewards/margins": 1.8186287879943848, "rewards/rejected": -2.83756947517395, "step": 6292 }, { "epoch": 0.73, "learning_rate": 8.361231417534823e-08, "logits/chosen": -2.613271713256836, "logits/rejected": -2.9848105907440186, "logps/chosen": -121.55078887939453, "logps/rejected": -217.26815795898438, "loss": 0.7162, "rewards/accuracies": 0.5, "rewards/chosen": 0.01217576302587986, "rewards/margins": 0.4635871648788452, "rewards/rejected": -0.45141148567199707, "step": 6293 }, { "epoch": 0.73, "learning_rate": 8.357719770572397e-08, "logits/chosen": -2.84100341796875, "logits/rejected": -2.6846184730529785, "logps/chosen": -156.8587188720703, "logps/rejected": -273.0836181640625, "loss": 0.3239, "rewards/accuracies": 1.0, "rewards/chosen": -0.29572510719299316, "rewards/margins": 1.8496615886688232, "rewards/rejected": -2.1453864574432373, "step": 6294 }, { "epoch": 0.73, "learning_rate": 8.354208123609974e-08, "logits/chosen": -3.0451793670654297, "logits/rejected": -3.0388383865356445, "logps/chosen": -148.17198181152344, "logps/rejected": -282.2555847167969, "loss": 0.2625, "rewards/accuracies": 0.875, "rewards/chosen": -0.24630150198936462, "rewards/margins": 1.7564725875854492, "rewards/rejected": -2.002774238586426, "step": 6295 }, { "epoch": 0.73, "learning_rate": 8.350696476647548e-08, "logits/chosen": -2.857025623321533, "logits/rejected": -2.7571797370910645, "logps/chosen": -296.7508544921875, "logps/rejected": -302.4924011230469, "loss": 0.6116, "rewards/accuracies": 0.625, "rewards/chosen": -0.731676459312439, "rewards/margins": 1.7516429424285889, "rewards/rejected": -2.4833192825317383, "step": 6296 }, { "epoch": 0.73, "learning_rate": 8.347184829685122e-08, "logits/chosen": -2.7498693466186523, "logits/rejected": -2.867306709289551, "logps/chosen": -169.7121124267578, "logps/rejected": -267.9374084472656, "loss": 0.2856, "rewards/accuracies": 0.75, "rewards/chosen": -0.1465682089328766, "rewards/margins": 1.8432800769805908, "rewards/rejected": -1.9898483753204346, "step": 6297 }, { "epoch": 0.73, "learning_rate": 8.343673182722696e-08, "logits/chosen": -2.8049240112304688, "logits/rejected": -2.8365559577941895, "logps/chosen": -251.622802734375, "logps/rejected": -372.61907958984375, "loss": 0.4332, "rewards/accuracies": 0.75, "rewards/chosen": -0.36610913276672363, "rewards/margins": 1.4710171222686768, "rewards/rejected": -1.8371262550354004, "step": 6298 }, { "epoch": 0.73, "learning_rate": 8.340161535760271e-08, "logits/chosen": -3.7430360317230225, "logits/rejected": -3.947934150695801, "logps/chosen": -100.45657348632812, "logps/rejected": -196.48182678222656, "loss": 0.6446, "rewards/accuracies": 0.625, "rewards/chosen": -0.09701310098171234, "rewards/margins": 1.3893128633499146, "rewards/rejected": -1.486325979232788, "step": 6299 }, { "epoch": 0.73, "learning_rate": 8.336649888797845e-08, "logits/chosen": -3.241220235824585, "logits/rejected": -3.17036771774292, "logps/chosen": -201.83982849121094, "logps/rejected": -226.7751922607422, "loss": 0.1453, "rewards/accuracies": 1.0, "rewards/chosen": 0.4980280101299286, "rewards/margins": 2.3650870323181152, "rewards/rejected": -1.8670592308044434, "step": 6300 }, { "epoch": 0.73, "learning_rate": 8.333138241835421e-08, "logits/chosen": -2.6817245483398438, "logits/rejected": -2.552454710006714, "logps/chosen": -243.77114868164062, "logps/rejected": -377.82427978515625, "loss": 0.4219, "rewards/accuracies": 0.625, "rewards/chosen": 0.6259527206420898, "rewards/margins": 1.9098961353302002, "rewards/rejected": -1.2839434146881104, "step": 6301 }, { "epoch": 0.73, "learning_rate": 8.329626594872995e-08, "logits/chosen": -3.139981746673584, "logits/rejected": -3.062119245529175, "logps/chosen": -150.7879180908203, "logps/rejected": -217.0288848876953, "loss": 0.4946, "rewards/accuracies": 0.875, "rewards/chosen": -0.09455959498882294, "rewards/margins": 1.3896270990371704, "rewards/rejected": -1.4841866493225098, "step": 6302 }, { "epoch": 0.73, "learning_rate": 8.32611494791057e-08, "logits/chosen": -2.615550994873047, "logits/rejected": -2.8647384643554688, "logps/chosen": -408.47674560546875, "logps/rejected": -331.8103332519531, "loss": 0.4335, "rewards/accuracies": 0.75, "rewards/chosen": 0.6378285884857178, "rewards/margins": 3.4544098377227783, "rewards/rejected": -2.8165812492370605, "step": 6303 }, { "epoch": 0.73, "learning_rate": 8.322603300948144e-08, "logits/chosen": -2.3938000202178955, "logits/rejected": -2.8679771423339844, "logps/chosen": -379.28179931640625, "logps/rejected": -276.7356262207031, "loss": 0.447, "rewards/accuracies": 0.75, "rewards/chosen": -0.1512470543384552, "rewards/margins": 1.646498680114746, "rewards/rejected": -1.797745943069458, "step": 6304 }, { "epoch": 0.73, "learning_rate": 8.319091653985718e-08, "logits/chosen": -4.037156105041504, "logits/rejected": -3.8509926795959473, "logps/chosen": -399.1663818359375, "logps/rejected": -300.700927734375, "loss": 0.2947, "rewards/accuracies": 0.75, "rewards/chosen": 0.1439438760280609, "rewards/margins": 2.057008743286133, "rewards/rejected": -1.9130650758743286, "step": 6305 }, { "epoch": 0.73, "learning_rate": 8.315580007023292e-08, "logits/chosen": -3.091869354248047, "logits/rejected": -3.1040849685668945, "logps/chosen": -341.19952392578125, "logps/rejected": -230.09078979492188, "loss": 0.4297, "rewards/accuracies": 0.875, "rewards/chosen": 0.04455310106277466, "rewards/margins": 1.0075570344924927, "rewards/rejected": -0.963003933429718, "step": 6306 }, { "epoch": 0.73, "learning_rate": 8.312068360060869e-08, "logits/chosen": -3.091261625289917, "logits/rejected": -3.0261282920837402, "logps/chosen": -275.23687744140625, "logps/rejected": -283.5963134765625, "loss": 0.3066, "rewards/accuracies": 0.875, "rewards/chosen": 0.08226747810840607, "rewards/margins": 2.639946460723877, "rewards/rejected": -2.5576791763305664, "step": 6307 }, { "epoch": 0.73, "learning_rate": 8.308556713098443e-08, "logits/chosen": -2.7452588081359863, "logits/rejected": -2.919250011444092, "logps/chosen": -174.54354858398438, "logps/rejected": -182.00390625, "loss": 0.5934, "rewards/accuracies": 0.625, "rewards/chosen": 0.23117992281913757, "rewards/margins": 0.5466278195381165, "rewards/rejected": -0.3154478669166565, "step": 6308 }, { "epoch": 0.73, "learning_rate": 8.305045066136017e-08, "logits/chosen": -2.735462188720703, "logits/rejected": -2.7080869674682617, "logps/chosen": -144.44998168945312, "logps/rejected": -238.22006225585938, "loss": 0.6183, "rewards/accuracies": 0.625, "rewards/chosen": -0.571324348449707, "rewards/margins": 0.4916572570800781, "rewards/rejected": -1.0629817247390747, "step": 6309 }, { "epoch": 0.73, "learning_rate": 8.301533419173591e-08, "logits/chosen": -2.7152786254882812, "logits/rejected": -2.510056972503662, "logps/chosen": -174.60498046875, "logps/rejected": -187.27255249023438, "loss": 0.3028, "rewards/accuracies": 0.75, "rewards/chosen": -0.3051367402076721, "rewards/margins": 2.016900062561035, "rewards/rejected": -2.3220367431640625, "step": 6310 }, { "epoch": 0.73, "learning_rate": 8.298021772211167e-08, "logits/chosen": -3.484388828277588, "logits/rejected": -3.6179251670837402, "logps/chosen": -182.06463623046875, "logps/rejected": -223.07818603515625, "loss": 0.2026, "rewards/accuracies": 1.0, "rewards/chosen": 0.5659706592559814, "rewards/margins": 2.03658390045166, "rewards/rejected": -1.4706132411956787, "step": 6311 }, { "epoch": 0.73, "learning_rate": 8.294510125248742e-08, "logits/chosen": -2.9861137866973877, "logits/rejected": -2.924213171005249, "logps/chosen": -224.51083374023438, "logps/rejected": -266.17486572265625, "loss": 0.2769, "rewards/accuracies": 0.875, "rewards/chosen": -0.7859976291656494, "rewards/margins": 2.972860336303711, "rewards/rejected": -3.7588582038879395, "step": 6312 }, { "epoch": 0.73, "learning_rate": 8.290998478286316e-08, "logits/chosen": -3.5376572608947754, "logits/rejected": -3.3463213443756104, "logps/chosen": -206.10394287109375, "logps/rejected": -363.5721435546875, "loss": 0.4037, "rewards/accuracies": 0.75, "rewards/chosen": 0.26267892122268677, "rewards/margins": 1.7122085094451904, "rewards/rejected": -1.4495295286178589, "step": 6313 }, { "epoch": 0.73, "learning_rate": 8.28748683132389e-08, "logits/chosen": -3.677359104156494, "logits/rejected": -3.882296085357666, "logps/chosen": -172.92117309570312, "logps/rejected": -283.5487060546875, "loss": 0.4488, "rewards/accuracies": 0.75, "rewards/chosen": -0.16247737407684326, "rewards/margins": 2.753994941711426, "rewards/rejected": -2.9164721965789795, "step": 6314 }, { "epoch": 0.73, "learning_rate": 8.283975184361466e-08, "logits/chosen": -3.6444387435913086, "logits/rejected": -3.5155320167541504, "logps/chosen": -242.06478881835938, "logps/rejected": -289.468994140625, "loss": 0.1407, "rewards/accuracies": 1.0, "rewards/chosen": -0.29052865505218506, "rewards/margins": 3.2564077377319336, "rewards/rejected": -3.546936511993408, "step": 6315 }, { "epoch": 0.73, "learning_rate": 8.28046353739904e-08, "logits/chosen": -3.368485927581787, "logits/rejected": -3.2354683876037598, "logps/chosen": -323.9401550292969, "logps/rejected": -262.0630798339844, "loss": 0.4261, "rewards/accuracies": 0.75, "rewards/chosen": -0.1523815095424652, "rewards/margins": 1.9821691513061523, "rewards/rejected": -2.1345505714416504, "step": 6316 }, { "epoch": 0.73, "learning_rate": 8.276951890436614e-08, "logits/chosen": -3.4895639419555664, "logits/rejected": -2.777543306350708, "logps/chosen": -292.3086242675781, "logps/rejected": -260.3674621582031, "loss": 0.2573, "rewards/accuracies": 0.875, "rewards/chosen": 0.06809408962726593, "rewards/margins": 2.3456273078918457, "rewards/rejected": -2.2775332927703857, "step": 6317 }, { "epoch": 0.73, "learning_rate": 8.273440243474189e-08, "logits/chosen": -2.4468207359313965, "logits/rejected": -2.7551393508911133, "logps/chosen": -235.4058837890625, "logps/rejected": -279.052490234375, "loss": 0.2761, "rewards/accuracies": 0.875, "rewards/chosen": -0.08247226476669312, "rewards/margins": 2.1060848236083984, "rewards/rejected": -2.1885571479797363, "step": 6318 }, { "epoch": 0.73, "learning_rate": 8.269928596511764e-08, "logits/chosen": -2.7532811164855957, "logits/rejected": -2.7586770057678223, "logps/chosen": -221.7967071533203, "logps/rejected": -221.61431884765625, "loss": 0.2395, "rewards/accuracies": 1.0, "rewards/chosen": -0.11650492250919342, "rewards/margins": 1.88939368724823, "rewards/rejected": -2.0058987140655518, "step": 6319 }, { "epoch": 0.73, "learning_rate": 8.266416949549339e-08, "logits/chosen": -2.5041565895080566, "logits/rejected": -2.70467209815979, "logps/chosen": -414.3048095703125, "logps/rejected": -388.85211181640625, "loss": 0.2927, "rewards/accuracies": 1.0, "rewards/chosen": 0.2787785828113556, "rewards/margins": 2.1656136512756348, "rewards/rejected": -1.886834979057312, "step": 6320 }, { "epoch": 0.73, "learning_rate": 8.262905302586913e-08, "logits/chosen": -3.1583809852600098, "logits/rejected": -3.023759365081787, "logps/chosen": -444.9303894042969, "logps/rejected": -266.6452331542969, "loss": 0.5707, "rewards/accuracies": 0.75, "rewards/chosen": -0.24766342341899872, "rewards/margins": 1.3770396709442139, "rewards/rejected": -1.624703049659729, "step": 6321 }, { "epoch": 0.73, "learning_rate": 8.259393655624487e-08, "logits/chosen": -3.009894371032715, "logits/rejected": -3.143695116043091, "logps/chosen": -466.63433837890625, "logps/rejected": -356.092529296875, "loss": 0.4783, "rewards/accuracies": 0.75, "rewards/chosen": 0.027475692331790924, "rewards/margins": 1.6950523853302002, "rewards/rejected": -1.667576789855957, "step": 6322 }, { "epoch": 0.73, "learning_rate": 8.255882008662063e-08, "logits/chosen": -3.135611057281494, "logits/rejected": -2.9321141242980957, "logps/chosen": -378.253173828125, "logps/rejected": -378.6016845703125, "loss": 0.2113, "rewards/accuracies": 0.875, "rewards/chosen": 0.06771749258041382, "rewards/margins": 2.4001057147979736, "rewards/rejected": -2.332387924194336, "step": 6323 }, { "epoch": 0.73, "learning_rate": 8.252370361699637e-08, "logits/chosen": -2.955157995223999, "logits/rejected": -3.264991521835327, "logps/chosen": -292.7845153808594, "logps/rejected": -361.56597900390625, "loss": 0.3371, "rewards/accuracies": 0.75, "rewards/chosen": -0.12923410534858704, "rewards/margins": 2.9329910278320312, "rewards/rejected": -3.062225341796875, "step": 6324 }, { "epoch": 0.73, "learning_rate": 8.248858714737211e-08, "logits/chosen": -2.7200727462768555, "logits/rejected": -2.8574166297912598, "logps/chosen": -101.61862182617188, "logps/rejected": -175.15118408203125, "loss": 0.3844, "rewards/accuracies": 0.75, "rewards/chosen": -0.5044230818748474, "rewards/margins": 2.0023608207702637, "rewards/rejected": -2.506783962249756, "step": 6325 }, { "epoch": 0.73, "learning_rate": 8.245347067774786e-08, "logits/chosen": -3.4870667457580566, "logits/rejected": -3.700751304626465, "logps/chosen": -288.1723327636719, "logps/rejected": -211.3855743408203, "loss": 0.3466, "rewards/accuracies": 0.875, "rewards/chosen": 0.1870412975549698, "rewards/margins": 1.7043567895889282, "rewards/rejected": -1.51731538772583, "step": 6326 }, { "epoch": 0.73, "learning_rate": 8.241835420812361e-08, "logits/chosen": -3.5563127994537354, "logits/rejected": -3.6597371101379395, "logps/chosen": -359.447021484375, "logps/rejected": -316.94757080078125, "loss": 0.4296, "rewards/accuracies": 0.625, "rewards/chosen": -0.19766739010810852, "rewards/margins": 1.4659759998321533, "rewards/rejected": -1.663643479347229, "step": 6327 }, { "epoch": 0.73, "learning_rate": 8.238323773849935e-08, "logits/chosen": -3.213351249694824, "logits/rejected": -2.8402633666992188, "logps/chosen": -240.65390014648438, "logps/rejected": -363.2618103027344, "loss": 0.2001, "rewards/accuracies": 0.875, "rewards/chosen": 0.2318764626979828, "rewards/margins": 2.739772319793701, "rewards/rejected": -2.5078957080841064, "step": 6328 }, { "epoch": 0.73, "learning_rate": 8.23481212688751e-08, "logits/chosen": -3.1121230125427246, "logits/rejected": -3.19346022605896, "logps/chosen": -306.8558349609375, "logps/rejected": -318.6468200683594, "loss": 0.3308, "rewards/accuracies": 0.75, "rewards/chosen": -0.04743701219558716, "rewards/margins": 2.361138343811035, "rewards/rejected": -2.4085752964019775, "step": 6329 }, { "epoch": 0.73, "learning_rate": 8.231300479925084e-08, "logits/chosen": -3.440436840057373, "logits/rejected": -3.5107665061950684, "logps/chosen": -227.20660400390625, "logps/rejected": -226.88888549804688, "loss": 0.2843, "rewards/accuracies": 0.75, "rewards/chosen": 0.24231500923633575, "rewards/margins": 1.882246732711792, "rewards/rejected": -1.6399317979812622, "step": 6330 }, { "epoch": 0.73, "learning_rate": 8.22778883296266e-08, "logits/chosen": -3.0982139110565186, "logits/rejected": -3.0002055168151855, "logps/chosen": -198.01141357421875, "logps/rejected": -179.04078674316406, "loss": 0.7845, "rewards/accuracies": 0.625, "rewards/chosen": -0.7550547122955322, "rewards/margins": 1.0219645500183105, "rewards/rejected": -1.7770192623138428, "step": 6331 }, { "epoch": 0.73, "learning_rate": 8.224277186000234e-08, "logits/chosen": -2.2251553535461426, "logits/rejected": -2.4125113487243652, "logps/chosen": -339.6291198730469, "logps/rejected": -382.272216796875, "loss": 0.2449, "rewards/accuracies": 0.875, "rewards/chosen": 0.20114058256149292, "rewards/margins": 2.482461452484131, "rewards/rejected": -2.281320810317993, "step": 6332 }, { "epoch": 0.73, "learning_rate": 8.220765539037808e-08, "logits/chosen": -3.0635986328125, "logits/rejected": -3.0098931789398193, "logps/chosen": -131.987548828125, "logps/rejected": -98.20221710205078, "loss": 0.4996, "rewards/accuracies": 0.625, "rewards/chosen": -0.1639045625925064, "rewards/margins": 0.680739164352417, "rewards/rejected": -0.844643771648407, "step": 6333 }, { "epoch": 0.73, "learning_rate": 8.217253892075382e-08, "logits/chosen": -3.476184844970703, "logits/rejected": -3.7759203910827637, "logps/chosen": -183.18801879882812, "logps/rejected": -241.10028076171875, "loss": 0.2953, "rewards/accuracies": 0.875, "rewards/chosen": 0.4520948529243469, "rewards/margins": 2.8170266151428223, "rewards/rejected": -2.364931583404541, "step": 6334 }, { "epoch": 0.73, "learning_rate": 8.213742245112959e-08, "logits/chosen": -2.7072935104370117, "logits/rejected": -2.904968500137329, "logps/chosen": -392.99102783203125, "logps/rejected": -366.7575378417969, "loss": 0.1756, "rewards/accuracies": 1.0, "rewards/chosen": 0.5494084358215332, "rewards/margins": 4.479907989501953, "rewards/rejected": -3.93049955368042, "step": 6335 }, { "epoch": 0.73, "learning_rate": 8.210230598150533e-08, "logits/chosen": -3.2992005348205566, "logits/rejected": -3.2719123363494873, "logps/chosen": -167.09947204589844, "logps/rejected": -208.82496643066406, "loss": 0.5886, "rewards/accuracies": 0.625, "rewards/chosen": -0.07910394668579102, "rewards/margins": 1.9716858863830566, "rewards/rejected": -2.0507898330688477, "step": 6336 }, { "epoch": 0.73, "learning_rate": 8.206718951188107e-08, "logits/chosen": -3.0594019889831543, "logits/rejected": -3.0872421264648438, "logps/chosen": -364.610107421875, "logps/rejected": -281.11517333984375, "loss": 0.5836, "rewards/accuracies": 0.625, "rewards/chosen": -0.2355513572692871, "rewards/margins": 0.9955876469612122, "rewards/rejected": -1.2311389446258545, "step": 6337 }, { "epoch": 0.73, "learning_rate": 8.203207304225681e-08, "logits/chosen": -3.19759464263916, "logits/rejected": -3.4111719131469727, "logps/chosen": -196.11135864257812, "logps/rejected": -205.60726928710938, "loss": 0.2528, "rewards/accuracies": 0.875, "rewards/chosen": 0.3736079931259155, "rewards/margins": 1.9612159729003906, "rewards/rejected": -1.5876080989837646, "step": 6338 }, { "epoch": 0.73, "learning_rate": 8.199695657263255e-08, "logits/chosen": -2.2560067176818848, "logits/rejected": -2.4120278358459473, "logps/chosen": -425.8804931640625, "logps/rejected": -339.73419189453125, "loss": 0.2247, "rewards/accuracies": 1.0, "rewards/chosen": -0.011721886694431305, "rewards/margins": 1.8477811813354492, "rewards/rejected": -1.859503149986267, "step": 6339 }, { "epoch": 0.73, "learning_rate": 8.196184010300832e-08, "logits/chosen": -2.738877534866333, "logits/rejected": -2.7374746799468994, "logps/chosen": -391.0930480957031, "logps/rejected": -288.0426025390625, "loss": 0.3969, "rewards/accuracies": 0.75, "rewards/chosen": 0.3270891308784485, "rewards/margins": 2.420353412628174, "rewards/rejected": -2.09326434135437, "step": 6340 }, { "epoch": 0.73, "learning_rate": 8.192672363338406e-08, "logits/chosen": -2.9948980808258057, "logits/rejected": -3.3099937438964844, "logps/chosen": -325.0084228515625, "logps/rejected": -379.2665710449219, "loss": 0.6689, "rewards/accuracies": 0.75, "rewards/chosen": 0.13226857781410217, "rewards/margins": 1.3249480724334717, "rewards/rejected": -1.192679524421692, "step": 6341 }, { "epoch": 0.73, "learning_rate": 8.18916071637598e-08, "logits/chosen": -2.9887402057647705, "logits/rejected": -2.799689769744873, "logps/chosen": -491.7427978515625, "logps/rejected": -280.3680114746094, "loss": 0.3918, "rewards/accuracies": 0.875, "rewards/chosen": 0.4385789930820465, "rewards/margins": 2.0152087211608887, "rewards/rejected": -1.5766297578811646, "step": 6342 }, { "epoch": 0.73, "learning_rate": 8.185649069413554e-08, "logits/chosen": -3.2631914615631104, "logits/rejected": -3.0676965713500977, "logps/chosen": -255.1527099609375, "logps/rejected": -267.9626770019531, "loss": 0.1692, "rewards/accuracies": 1.0, "rewards/chosen": 0.8432037234306335, "rewards/margins": 2.703885078430176, "rewards/rejected": -1.8606815338134766, "step": 6343 }, { "epoch": 0.73, "learning_rate": 8.182137422451129e-08, "logits/chosen": -2.6290388107299805, "logits/rejected": -2.8013126850128174, "logps/chosen": -267.56170654296875, "logps/rejected": -168.4673614501953, "loss": 0.1918, "rewards/accuracies": 1.0, "rewards/chosen": 0.24051152169704437, "rewards/margins": 2.336066246032715, "rewards/rejected": -2.095554828643799, "step": 6344 }, { "epoch": 0.73, "learning_rate": 8.178625775488703e-08, "logits/chosen": -3.4057188034057617, "logits/rejected": -3.060662269592285, "logps/chosen": -285.64752197265625, "logps/rejected": -297.5594482421875, "loss": 0.6255, "rewards/accuracies": 0.75, "rewards/chosen": -0.21215638518333435, "rewards/margins": 1.5438926219940186, "rewards/rejected": -1.7560489177703857, "step": 6345 }, { "epoch": 0.73, "learning_rate": 8.175114128526279e-08, "logits/chosen": -2.923799991607666, "logits/rejected": -2.5578091144561768, "logps/chosen": -213.54385375976562, "logps/rejected": -278.5721130371094, "loss": 0.3564, "rewards/accuracies": 0.75, "rewards/chosen": 0.034294985234737396, "rewards/margins": 1.345210075378418, "rewards/rejected": -1.3109149932861328, "step": 6346 }, { "epoch": 0.73, "learning_rate": 8.171602481563853e-08, "logits/chosen": -2.8355188369750977, "logits/rejected": -2.515604019165039, "logps/chosen": -199.46115112304688, "logps/rejected": -207.05599975585938, "loss": 0.1932, "rewards/accuracies": 1.0, "rewards/chosen": 0.18269848823547363, "rewards/margins": 2.8039393424987793, "rewards/rejected": -2.6212410926818848, "step": 6347 }, { "epoch": 0.73, "learning_rate": 8.168090834601428e-08, "logits/chosen": -3.6552932262420654, "logits/rejected": -3.697052001953125, "logps/chosen": -188.9609375, "logps/rejected": -122.27813720703125, "loss": 0.3203, "rewards/accuracies": 0.75, "rewards/chosen": 0.3424084484577179, "rewards/margins": 1.938812017440796, "rewards/rejected": -1.5964034795761108, "step": 6348 }, { "epoch": 0.73, "learning_rate": 8.164579187639002e-08, "logits/chosen": -1.9697914123535156, "logits/rejected": -2.1959214210510254, "logps/chosen": -286.42877197265625, "logps/rejected": -243.05398559570312, "loss": 0.2584, "rewards/accuracies": 0.875, "rewards/chosen": -0.06282326579093933, "rewards/margins": 1.7223454713821411, "rewards/rejected": -1.7851686477661133, "step": 6349 }, { "epoch": 0.73, "learning_rate": 8.161067540676576e-08, "logits/chosen": -3.0408599376678467, "logits/rejected": -2.951542377471924, "logps/chosen": -312.7063293457031, "logps/rejected": -380.89276123046875, "loss": 0.2314, "rewards/accuracies": 1.0, "rewards/chosen": 0.2843957245349884, "rewards/margins": 2.938615083694458, "rewards/rejected": -2.654219150543213, "step": 6350 }, { "epoch": 0.73, "learning_rate": 8.15755589371415e-08, "logits/chosen": -3.1710567474365234, "logits/rejected": -3.0091583728790283, "logps/chosen": -273.3824462890625, "logps/rejected": -155.568359375, "loss": 0.4274, "rewards/accuracies": 0.875, "rewards/chosen": -0.3847086727619171, "rewards/margins": 1.5609290599822998, "rewards/rejected": -1.945637822151184, "step": 6351 }, { "epoch": 0.73, "learning_rate": 8.154044246751727e-08, "logits/chosen": -2.915783405303955, "logits/rejected": -2.8454856872558594, "logps/chosen": -251.03933715820312, "logps/rejected": -370.4572448730469, "loss": 0.4402, "rewards/accuracies": 0.625, "rewards/chosen": 0.18023307621479034, "rewards/margins": 2.408940315246582, "rewards/rejected": -2.2287070751190186, "step": 6352 }, { "epoch": 0.73, "learning_rate": 8.150532599789301e-08, "logits/chosen": -3.021355390548706, "logits/rejected": -2.7353310585021973, "logps/chosen": -351.80126953125, "logps/rejected": -261.84014892578125, "loss": 0.1935, "rewards/accuracies": 1.0, "rewards/chosen": 0.15690863132476807, "rewards/margins": 3.200167417526245, "rewards/rejected": -3.0432591438293457, "step": 6353 }, { "epoch": 0.73, "learning_rate": 8.147020952826875e-08, "logits/chosen": -3.6495766639709473, "logits/rejected": -3.8161864280700684, "logps/chosen": -198.04107666015625, "logps/rejected": -185.9938507080078, "loss": 0.2707, "rewards/accuracies": 1.0, "rewards/chosen": 0.3888223171234131, "rewards/margins": 2.704798698425293, "rewards/rejected": -2.31597638130188, "step": 6354 }, { "epoch": 0.73, "learning_rate": 8.143509305864449e-08, "logits/chosen": -2.3097035884857178, "logits/rejected": -2.286344528198242, "logps/chosen": -194.91038513183594, "logps/rejected": -213.70880126953125, "loss": 0.4154, "rewards/accuracies": 0.875, "rewards/chosen": -0.24412240087985992, "rewards/margins": 1.9403396844863892, "rewards/rejected": -2.184462070465088, "step": 6355 }, { "epoch": 0.73, "learning_rate": 8.139997658902025e-08, "logits/chosen": -3.6603951454162598, "logits/rejected": -3.6158432960510254, "logps/chosen": -137.65428161621094, "logps/rejected": -138.30923461914062, "loss": 0.3607, "rewards/accuracies": 0.75, "rewards/chosen": 0.055985912680625916, "rewards/margins": 1.1378557682037354, "rewards/rejected": -1.0818698406219482, "step": 6356 }, { "epoch": 0.73, "learning_rate": 8.1364860119396e-08, "logits/chosen": -2.837092399597168, "logits/rejected": -2.816150426864624, "logps/chosen": -243.66888427734375, "logps/rejected": -175.4250030517578, "loss": 0.6395, "rewards/accuracies": 0.5, "rewards/chosen": -0.14669516682624817, "rewards/margins": 0.36867502331733704, "rewards/rejected": -0.5153701901435852, "step": 6357 }, { "epoch": 0.73, "learning_rate": 8.132974364977174e-08, "logits/chosen": -2.4531631469726562, "logits/rejected": -2.4560277462005615, "logps/chosen": -509.62567138671875, "logps/rejected": -264.672607421875, "loss": 0.1212, "rewards/accuracies": 1.0, "rewards/chosen": 0.27339839935302734, "rewards/margins": 2.5294604301452637, "rewards/rejected": -2.2560620307922363, "step": 6358 }, { "epoch": 0.73, "learning_rate": 8.129462718014748e-08, "logits/chosen": -2.3350751399993896, "logits/rejected": -2.4873359203338623, "logps/chosen": -359.728271484375, "logps/rejected": -277.0501708984375, "loss": 0.6514, "rewards/accuracies": 0.5, "rewards/chosen": 0.019529804587364197, "rewards/margins": 0.603407621383667, "rewards/rejected": -0.5838778614997864, "step": 6359 }, { "epoch": 0.73, "learning_rate": 8.125951071052324e-08, "logits/chosen": -2.404452085494995, "logits/rejected": -2.1451528072357178, "logps/chosen": -253.50416564941406, "logps/rejected": -348.7359313964844, "loss": 0.1431, "rewards/accuracies": 1.0, "rewards/chosen": 0.37544316053390503, "rewards/margins": 2.4515695571899414, "rewards/rejected": -2.0761263370513916, "step": 6360 }, { "epoch": 0.73, "learning_rate": 8.122439424089898e-08, "logits/chosen": -2.7389893531799316, "logits/rejected": -2.730325698852539, "logps/chosen": -398.6631164550781, "logps/rejected": -269.17828369140625, "loss": 0.5243, "rewards/accuracies": 0.625, "rewards/chosen": -0.0642206072807312, "rewards/margins": 0.8052090406417847, "rewards/rejected": -0.8694296479225159, "step": 6361 }, { "epoch": 0.73, "learning_rate": 8.118927777127472e-08, "logits/chosen": -3.770435333251953, "logits/rejected": -3.221694231033325, "logps/chosen": -413.30889892578125, "logps/rejected": -252.68704223632812, "loss": 0.5221, "rewards/accuracies": 0.75, "rewards/chosen": -0.0518014132976532, "rewards/margins": 1.49700927734375, "rewards/rejected": -1.548810601234436, "step": 6362 }, { "epoch": 0.73, "learning_rate": 8.115416130165047e-08, "logits/chosen": -3.401925563812256, "logits/rejected": -3.8937439918518066, "logps/chosen": -242.93101501464844, "logps/rejected": -276.4593505859375, "loss": 0.1997, "rewards/accuracies": 0.875, "rewards/chosen": -0.004763960838317871, "rewards/margins": 3.1438703536987305, "rewards/rejected": -3.148634433746338, "step": 6363 }, { "epoch": 0.73, "learning_rate": 8.111904483202622e-08, "logits/chosen": -2.9693784713745117, "logits/rejected": -3.167632579803467, "logps/chosen": -197.57054138183594, "logps/rejected": -354.4634094238281, "loss": 0.2437, "rewards/accuracies": 1.0, "rewards/chosen": 0.07068327069282532, "rewards/margins": 2.051967144012451, "rewards/rejected": -1.9812836647033691, "step": 6364 }, { "epoch": 0.73, "learning_rate": 8.108392836240196e-08, "logits/chosen": -3.2029991149902344, "logits/rejected": -3.379465341567993, "logps/chosen": -256.3448181152344, "logps/rejected": -309.5802001953125, "loss": 0.1356, "rewards/accuracies": 1.0, "rewards/chosen": -0.007408337667584419, "rewards/margins": 2.6076111793518066, "rewards/rejected": -2.6150195598602295, "step": 6365 }, { "epoch": 0.73, "learning_rate": 8.10488118927777e-08, "logits/chosen": -2.960991859436035, "logits/rejected": -3.1886990070343018, "logps/chosen": -253.54054260253906, "logps/rejected": -251.45574951171875, "loss": 0.1368, "rewards/accuracies": 1.0, "rewards/chosen": -0.13899201154708862, "rewards/margins": 2.6622610092163086, "rewards/rejected": -2.801253080368042, "step": 6366 }, { "epoch": 0.73, "learning_rate": 8.101369542315345e-08, "logits/chosen": -2.7048802375793457, "logits/rejected": -2.8295600414276123, "logps/chosen": -174.927978515625, "logps/rejected": -171.82675170898438, "loss": 0.4775, "rewards/accuracies": 0.875, "rewards/chosen": 0.050238706171512604, "rewards/margins": 1.6084250211715698, "rewards/rejected": -1.5581862926483154, "step": 6367 }, { "epoch": 0.73, "learning_rate": 8.097857895352921e-08, "logits/chosen": -3.2393269538879395, "logits/rejected": -3.596400260925293, "logps/chosen": -113.79402160644531, "logps/rejected": -223.12271118164062, "loss": 0.1165, "rewards/accuracies": 1.0, "rewards/chosen": 0.3267463743686676, "rewards/margins": 3.569837808609009, "rewards/rejected": -3.243091106414795, "step": 6368 }, { "epoch": 0.73, "learning_rate": 8.094346248390495e-08, "logits/chosen": -4.098820209503174, "logits/rejected": -3.85150146484375, "logps/chosen": -284.1236877441406, "logps/rejected": -178.03887939453125, "loss": 0.3697, "rewards/accuracies": 0.75, "rewards/chosen": -0.18107455968856812, "rewards/margins": 1.7937403917312622, "rewards/rejected": -1.9748151302337646, "step": 6369 }, { "epoch": 0.73, "learning_rate": 8.09083460142807e-08, "logits/chosen": -3.7076416015625, "logits/rejected": -3.507441997528076, "logps/chosen": -326.6656799316406, "logps/rejected": -296.0179748535156, "loss": 0.4952, "rewards/accuracies": 0.875, "rewards/chosen": -0.3776654601097107, "rewards/margins": 1.8549624681472778, "rewards/rejected": -2.2326278686523438, "step": 6370 }, { "epoch": 0.73, "learning_rate": 8.087322954465643e-08, "logits/chosen": -2.939852237701416, "logits/rejected": -2.79557204246521, "logps/chosen": -407.7025451660156, "logps/rejected": -309.4299621582031, "loss": 0.4675, "rewards/accuracies": 0.75, "rewards/chosen": 0.17217525839805603, "rewards/margins": 1.0717875957489014, "rewards/rejected": -0.899612307548523, "step": 6371 }, { "epoch": 0.73, "learning_rate": 8.083811307503219e-08, "logits/chosen": -2.56084942817688, "logits/rejected": -2.915778398513794, "logps/chosen": -479.06134033203125, "logps/rejected": -342.6046447753906, "loss": 0.2155, "rewards/accuracies": 0.875, "rewards/chosen": -0.17251892387866974, "rewards/margins": 3.286240577697754, "rewards/rejected": -3.4587600231170654, "step": 6372 }, { "epoch": 0.73, "learning_rate": 8.080299660540793e-08, "logits/chosen": -2.6991729736328125, "logits/rejected": -2.800914764404297, "logps/chosen": -164.33297729492188, "logps/rejected": -225.2251434326172, "loss": 0.3595, "rewards/accuracies": 0.875, "rewards/chosen": 0.022231578826904297, "rewards/margins": 2.265775203704834, "rewards/rejected": -2.2435436248779297, "step": 6373 }, { "epoch": 0.73, "learning_rate": 8.076788013578368e-08, "logits/chosen": -3.3584461212158203, "logits/rejected": -3.167689800262451, "logps/chosen": -242.91632080078125, "logps/rejected": -159.3011932373047, "loss": 0.252, "rewards/accuracies": 1.0, "rewards/chosen": 0.1409316509962082, "rewards/margins": 2.01949405670166, "rewards/rejected": -1.8785622119903564, "step": 6374 }, { "epoch": 0.73, "learning_rate": 8.073276366615942e-08, "logits/chosen": -3.6896262168884277, "logits/rejected": -3.4929118156433105, "logps/chosen": -234.193359375, "logps/rejected": -301.0379638671875, "loss": 0.7213, "rewards/accuracies": 0.625, "rewards/chosen": -0.736250638961792, "rewards/margins": 1.606673002243042, "rewards/rejected": -2.342923641204834, "step": 6375 }, { "epoch": 0.74, "learning_rate": 8.069764719653518e-08, "logits/chosen": -3.1718246936798096, "logits/rejected": -3.4381251335144043, "logps/chosen": -84.1958999633789, "logps/rejected": -140.40133666992188, "loss": 0.313, "rewards/accuracies": 0.875, "rewards/chosen": -0.017517946660518646, "rewards/margins": 1.9749609231948853, "rewards/rejected": -1.992478847503662, "step": 6376 }, { "epoch": 0.74, "learning_rate": 8.066253072691092e-08, "logits/chosen": -3.036738872528076, "logits/rejected": -2.881462574005127, "logps/chosen": -572.7735595703125, "logps/rejected": -326.032958984375, "loss": 0.2442, "rewards/accuracies": 1.0, "rewards/chosen": 0.35994771122932434, "rewards/margins": 2.322282075881958, "rewards/rejected": -1.962334394454956, "step": 6377 }, { "epoch": 0.74, "learning_rate": 8.062741425728666e-08, "logits/chosen": -3.245387077331543, "logits/rejected": -2.988888740539551, "logps/chosen": -232.01620483398438, "logps/rejected": -214.65536499023438, "loss": 0.3768, "rewards/accuracies": 1.0, "rewards/chosen": 0.5159415006637573, "rewards/margins": 1.1996166706085205, "rewards/rejected": -0.6836751103401184, "step": 6378 }, { "epoch": 0.74, "learning_rate": 8.05922977876624e-08, "logits/chosen": -2.6346516609191895, "logits/rejected": -2.4973678588867188, "logps/chosen": -314.97100830078125, "logps/rejected": -259.6939392089844, "loss": 0.4411, "rewards/accuracies": 0.75, "rewards/chosen": 0.13622024655342102, "rewards/margins": 1.2620465755462646, "rewards/rejected": -1.125826358795166, "step": 6379 }, { "epoch": 0.74, "learning_rate": 8.055718131803817e-08, "logits/chosen": -3.396212100982666, "logits/rejected": -3.045229196548462, "logps/chosen": -235.3717803955078, "logps/rejected": -196.712646484375, "loss": 0.4741, "rewards/accuracies": 0.625, "rewards/chosen": 0.03689885139465332, "rewards/margins": 1.4741908311843872, "rewards/rejected": -1.4372919797897339, "step": 6380 }, { "epoch": 0.74, "learning_rate": 8.052206484841391e-08, "logits/chosen": -3.6933372020721436, "logits/rejected": -3.816805601119995, "logps/chosen": -140.7845458984375, "logps/rejected": -227.11376953125, "loss": 0.4645, "rewards/accuracies": 0.875, "rewards/chosen": -0.6311870813369751, "rewards/margins": 1.0784900188446045, "rewards/rejected": -1.7096772193908691, "step": 6381 }, { "epoch": 0.74, "learning_rate": 8.048694837878965e-08, "logits/chosen": -3.1431894302368164, "logits/rejected": -2.702505588531494, "logps/chosen": -240.70468139648438, "logps/rejected": -204.72630310058594, "loss": 0.2977, "rewards/accuracies": 0.875, "rewards/chosen": 0.1493338793516159, "rewards/margins": 1.5567643642425537, "rewards/rejected": -1.4074304103851318, "step": 6382 }, { "epoch": 0.74, "learning_rate": 8.045183190916539e-08, "logits/chosen": -3.2592625617980957, "logits/rejected": -2.8733367919921875, "logps/chosen": -236.16873168945312, "logps/rejected": -240.5750732421875, "loss": 0.3622, "rewards/accuracies": 0.875, "rewards/chosen": -0.1355106234550476, "rewards/margins": 2.313220500946045, "rewards/rejected": -2.4487311840057373, "step": 6383 }, { "epoch": 0.74, "learning_rate": 8.041671543954113e-08, "logits/chosen": -3.474336862564087, "logits/rejected": -3.057342290878296, "logps/chosen": -191.15052795410156, "logps/rejected": -261.76080322265625, "loss": 0.2824, "rewards/accuracies": 0.875, "rewards/chosen": -0.41848063468933105, "rewards/margins": 1.907273292541504, "rewards/rejected": -2.325753927230835, "step": 6384 }, { "epoch": 0.74, "learning_rate": 8.03815989699169e-08, "logits/chosen": -3.7758278846740723, "logits/rejected": -3.2835068702697754, "logps/chosen": -217.2694091796875, "logps/rejected": -178.60794067382812, "loss": 0.4247, "rewards/accuracies": 0.75, "rewards/chosen": -0.5022034049034119, "rewards/margins": 1.6934077739715576, "rewards/rejected": -2.1956112384796143, "step": 6385 }, { "epoch": 0.74, "learning_rate": 8.034648250029264e-08, "logits/chosen": -3.574932098388672, "logits/rejected": -4.089390754699707, "logps/chosen": -182.7436981201172, "logps/rejected": -238.77569580078125, "loss": 0.1871, "rewards/accuracies": 1.0, "rewards/chosen": -0.3667198419570923, "rewards/margins": 2.95084547996521, "rewards/rejected": -3.317565441131592, "step": 6386 }, { "epoch": 0.74, "learning_rate": 8.031136603066838e-08, "logits/chosen": -2.1259918212890625, "logits/rejected": -2.2059555053710938, "logps/chosen": -347.5075378417969, "logps/rejected": -248.9064178466797, "loss": 0.3945, "rewards/accuracies": 0.75, "rewards/chosen": 0.4463377296924591, "rewards/margins": 1.2772406339645386, "rewards/rejected": -0.8309028744697571, "step": 6387 }, { "epoch": 0.74, "learning_rate": 8.027624956104412e-08, "logits/chosen": -2.9886698722839355, "logits/rejected": -3.0674943923950195, "logps/chosen": -436.2943115234375, "logps/rejected": -410.9330749511719, "loss": 0.4561, "rewards/accuracies": 0.875, "rewards/chosen": -0.5100390315055847, "rewards/margins": 1.3337712287902832, "rewards/rejected": -1.8438104391098022, "step": 6388 }, { "epoch": 0.74, "learning_rate": 8.024113309141987e-08, "logits/chosen": -3.5982842445373535, "logits/rejected": -3.6389830112457275, "logps/chosen": -112.19334411621094, "logps/rejected": -143.26820373535156, "loss": 0.7685, "rewards/accuracies": 0.75, "rewards/chosen": -1.029625654220581, "rewards/margins": 0.5360532402992249, "rewards/rejected": -1.5656788349151611, "step": 6389 }, { "epoch": 0.74, "learning_rate": 8.020601662179561e-08, "logits/chosen": -2.8903892040252686, "logits/rejected": -2.902412176132202, "logps/chosen": -201.92568969726562, "logps/rejected": -229.05555725097656, "loss": 0.1263, "rewards/accuracies": 1.0, "rewards/chosen": 0.49130260944366455, "rewards/margins": 3.060708522796631, "rewards/rejected": -2.5694057941436768, "step": 6390 }, { "epoch": 0.74, "learning_rate": 8.017090015217137e-08, "logits/chosen": -3.1453559398651123, "logits/rejected": -2.9990971088409424, "logps/chosen": -344.6080322265625, "logps/rejected": -387.9911193847656, "loss": 0.5832, "rewards/accuracies": 0.625, "rewards/chosen": 0.015421777963638306, "rewards/margins": 1.2975811958312988, "rewards/rejected": -1.2821593284606934, "step": 6391 }, { "epoch": 0.74, "learning_rate": 8.013578368254711e-08, "logits/chosen": -3.3820130825042725, "logits/rejected": -3.2194957733154297, "logps/chosen": -256.8796081542969, "logps/rejected": -308.796875, "loss": 0.317, "rewards/accuracies": 0.875, "rewards/chosen": 0.024566650390625, "rewards/margins": 2.9478092193603516, "rewards/rejected": -2.9232425689697266, "step": 6392 }, { "epoch": 0.74, "learning_rate": 8.010066721292286e-08, "logits/chosen": -3.1914281845092773, "logits/rejected": -3.6177053451538086, "logps/chosen": -222.4288330078125, "logps/rejected": -325.9631042480469, "loss": 0.2631, "rewards/accuracies": 1.0, "rewards/chosen": 0.6532437205314636, "rewards/margins": 2.415058135986328, "rewards/rejected": -1.7618142366409302, "step": 6393 }, { "epoch": 0.74, "learning_rate": 8.00655507432986e-08, "logits/chosen": -3.3823838233947754, "logits/rejected": -3.2002811431884766, "logps/chosen": -358.5682373046875, "logps/rejected": -311.07305908203125, "loss": 0.4213, "rewards/accuracies": 0.75, "rewards/chosen": -0.2525482177734375, "rewards/margins": 1.7154912948608398, "rewards/rejected": -1.9680395126342773, "step": 6394 }, { "epoch": 0.74, "learning_rate": 8.003043427367434e-08, "logits/chosen": -2.5339107513427734, "logits/rejected": -2.720278024673462, "logps/chosen": -160.11904907226562, "logps/rejected": -142.06956481933594, "loss": 0.3725, "rewards/accuracies": 0.75, "rewards/chosen": -0.16557712852954865, "rewards/margins": 1.1674367189407349, "rewards/rejected": -1.333013892173767, "step": 6395 }, { "epoch": 0.74, "learning_rate": 7.999531780405008e-08, "logits/chosen": -2.83575439453125, "logits/rejected": -2.7528390884399414, "logps/chosen": -145.2420654296875, "logps/rejected": -200.6960906982422, "loss": 0.4333, "rewards/accuracies": 0.875, "rewards/chosen": -0.21539145708084106, "rewards/margins": 1.1188607215881348, "rewards/rejected": -1.3342522382736206, "step": 6396 }, { "epoch": 0.74, "learning_rate": 7.996020133442585e-08, "logits/chosen": -2.6903083324432373, "logits/rejected": -2.676698923110962, "logps/chosen": -214.57102966308594, "logps/rejected": -193.89443969726562, "loss": 0.3035, "rewards/accuracies": 0.875, "rewards/chosen": 0.12050998210906982, "rewards/margins": 2.1191534996032715, "rewards/rejected": -1.998643398284912, "step": 6397 }, { "epoch": 0.74, "learning_rate": 7.992508486480159e-08, "logits/chosen": -3.0630149841308594, "logits/rejected": -3.0034971237182617, "logps/chosen": -275.3714904785156, "logps/rejected": -309.33416748046875, "loss": 0.3386, "rewards/accuracies": 0.75, "rewards/chosen": -0.0684414803981781, "rewards/margins": 1.2675408124923706, "rewards/rejected": -1.335982322692871, "step": 6398 }, { "epoch": 0.74, "learning_rate": 7.988996839517733e-08, "logits/chosen": -2.747711658477783, "logits/rejected": -2.778618812561035, "logps/chosen": -254.91064453125, "logps/rejected": -202.17062377929688, "loss": 0.6683, "rewards/accuracies": 0.5, "rewards/chosen": -0.39809221029281616, "rewards/margins": 1.691719651222229, "rewards/rejected": -2.0898118019104004, "step": 6399 }, { "epoch": 0.74, "learning_rate": 7.985485192555307e-08, "logits/chosen": -2.94149112701416, "logits/rejected": -3.0213146209716797, "logps/chosen": -267.23883056640625, "logps/rejected": -187.97970581054688, "loss": 0.3598, "rewards/accuracies": 0.875, "rewards/chosen": 0.06932765245437622, "rewards/margins": 0.9977157711982727, "rewards/rejected": -0.9283881187438965, "step": 6400 }, { "epoch": 0.74, "learning_rate": 7.981973545592884e-08, "logits/chosen": -2.706113815307617, "logits/rejected": -2.74666690826416, "logps/chosen": -166.17848205566406, "logps/rejected": -272.041015625, "loss": 0.631, "rewards/accuracies": 0.75, "rewards/chosen": 0.004582107067108154, "rewards/margins": 1.252347469329834, "rewards/rejected": -1.247765302658081, "step": 6401 }, { "epoch": 0.74, "learning_rate": 7.978461898630458e-08, "logits/chosen": -3.3254823684692383, "logits/rejected": -3.224916696548462, "logps/chosen": -372.8643798828125, "logps/rejected": -328.0347900390625, "loss": 0.7962, "rewards/accuracies": 0.75, "rewards/chosen": -0.6515939235687256, "rewards/margins": 1.082826852798462, "rewards/rejected": -1.7344207763671875, "step": 6402 }, { "epoch": 0.74, "learning_rate": 7.974950251668032e-08, "logits/chosen": -2.6197896003723145, "logits/rejected": -2.4741783142089844, "logps/chosen": -217.13755798339844, "logps/rejected": -139.45465087890625, "loss": 0.3904, "rewards/accuracies": 1.0, "rewards/chosen": 0.27704551815986633, "rewards/margins": 0.9774760603904724, "rewards/rejected": -0.7004304528236389, "step": 6403 }, { "epoch": 0.74, "learning_rate": 7.971438604705606e-08, "logits/chosen": -3.1816813945770264, "logits/rejected": -3.1724586486816406, "logps/chosen": -219.45481872558594, "logps/rejected": -236.34559631347656, "loss": 0.4211, "rewards/accuracies": 0.75, "rewards/chosen": -0.2791505753993988, "rewards/margins": 2.07902193069458, "rewards/rejected": -2.3581724166870117, "step": 6404 }, { "epoch": 0.74, "learning_rate": 7.967926957743181e-08, "logits/chosen": -2.437246322631836, "logits/rejected": -2.4722900390625, "logps/chosen": -303.12457275390625, "logps/rejected": -240.4605712890625, "loss": 0.5546, "rewards/accuracies": 0.875, "rewards/chosen": 0.0028901025652885437, "rewards/margins": 1.148919701576233, "rewards/rejected": -1.1460297107696533, "step": 6405 }, { "epoch": 0.74, "learning_rate": 7.964415310780756e-08, "logits/chosen": -3.187304973602295, "logits/rejected": -3.1214237213134766, "logps/chosen": -171.21070861816406, "logps/rejected": -178.51560974121094, "loss": 0.3804, "rewards/accuracies": 0.75, "rewards/chosen": 0.20186583697795868, "rewards/margins": 1.6068201065063477, "rewards/rejected": -1.4049540758132935, "step": 6406 }, { "epoch": 0.74, "learning_rate": 7.96090366381833e-08, "logits/chosen": -2.596289873123169, "logits/rejected": -2.517672538757324, "logps/chosen": -286.5695495605469, "logps/rejected": -233.165771484375, "loss": 0.4037, "rewards/accuracies": 0.75, "rewards/chosen": -0.4390692710876465, "rewards/margins": 1.5821290016174316, "rewards/rejected": -2.021198272705078, "step": 6407 }, { "epoch": 0.74, "learning_rate": 7.957392016855905e-08, "logits/chosen": -3.241090774536133, "logits/rejected": -3.4115002155303955, "logps/chosen": -328.1256408691406, "logps/rejected": -226.6127166748047, "loss": 0.5113, "rewards/accuracies": 0.75, "rewards/chosen": -0.8321160674095154, "rewards/margins": 2.0095484256744385, "rewards/rejected": -2.8416645526885986, "step": 6408 }, { "epoch": 0.74, "learning_rate": 7.95388036989348e-08, "logits/chosen": -3.1972084045410156, "logits/rejected": -3.244002342224121, "logps/chosen": -275.59259033203125, "logps/rejected": -284.1604919433594, "loss": 0.2857, "rewards/accuracies": 0.875, "rewards/chosen": 0.18991619348526, "rewards/margins": 1.8637348413467407, "rewards/rejected": -1.673818588256836, "step": 6409 }, { "epoch": 0.74, "learning_rate": 7.950368722931054e-08, "logits/chosen": -2.7098658084869385, "logits/rejected": -2.969816207885742, "logps/chosen": -268.836669921875, "logps/rejected": -179.5952911376953, "loss": 0.4366, "rewards/accuracies": 0.75, "rewards/chosen": 0.03782980516552925, "rewards/margins": 1.3893709182739258, "rewards/rejected": -1.3515410423278809, "step": 6410 }, { "epoch": 0.74, "learning_rate": 7.946857075968628e-08, "logits/chosen": -2.1057214736938477, "logits/rejected": -2.249715805053711, "logps/chosen": -159.45518493652344, "logps/rejected": -113.3492431640625, "loss": 0.6113, "rewards/accuracies": 0.625, "rewards/chosen": -0.3513216972351074, "rewards/margins": 0.49614864587783813, "rewards/rejected": -0.8474702835083008, "step": 6411 }, { "epoch": 0.74, "learning_rate": 7.943345429006203e-08, "logits/chosen": -3.3374617099761963, "logits/rejected": -3.3425800800323486, "logps/chosen": -297.6021423339844, "logps/rejected": -257.83013916015625, "loss": 0.3108, "rewards/accuracies": 0.875, "rewards/chosen": -0.13965582847595215, "rewards/margins": 2.656076669692993, "rewards/rejected": -2.7957327365875244, "step": 6412 }, { "epoch": 0.74, "learning_rate": 7.939833782043779e-08, "logits/chosen": -3.2378180027008057, "logits/rejected": -3.326991081237793, "logps/chosen": -387.8145751953125, "logps/rejected": -380.1577453613281, "loss": 0.1679, "rewards/accuracies": 1.0, "rewards/chosen": 0.16405567526817322, "rewards/margins": 2.832596778869629, "rewards/rejected": -2.6685409545898438, "step": 6413 }, { "epoch": 0.74, "learning_rate": 7.936322135081353e-08, "logits/chosen": -3.245939254760742, "logits/rejected": -3.8335585594177246, "logps/chosen": -207.14767456054688, "logps/rejected": -346.48956298828125, "loss": 0.1937, "rewards/accuracies": 1.0, "rewards/chosen": -0.093332439661026, "rewards/margins": 2.6958625316619873, "rewards/rejected": -2.7891950607299805, "step": 6414 }, { "epoch": 0.74, "learning_rate": 7.932810488118927e-08, "logits/chosen": -3.906020402908325, "logits/rejected": -3.6584229469299316, "logps/chosen": -187.86448669433594, "logps/rejected": -133.4293670654297, "loss": 0.4861, "rewards/accuracies": 0.75, "rewards/chosen": -0.5021368861198425, "rewards/margins": 0.7564477920532227, "rewards/rejected": -1.2585844993591309, "step": 6415 }, { "epoch": 0.74, "learning_rate": 7.929298841156501e-08, "logits/chosen": -3.4207942485809326, "logits/rejected": -3.3908863067626953, "logps/chosen": -252.47952270507812, "logps/rejected": -259.82464599609375, "loss": 0.3399, "rewards/accuracies": 0.875, "rewards/chosen": -0.07614961266517639, "rewards/margins": 2.7489774227142334, "rewards/rejected": -2.825127124786377, "step": 6416 }, { "epoch": 0.74, "learning_rate": 7.925787194194077e-08, "logits/chosen": -3.124948501586914, "logits/rejected": -2.9276134967803955, "logps/chosen": -292.0642395019531, "logps/rejected": -355.5625305175781, "loss": 0.4231, "rewards/accuracies": 0.875, "rewards/chosen": -0.2036464512348175, "rewards/margins": 1.5435770750045776, "rewards/rejected": -1.7472234964370728, "step": 6417 }, { "epoch": 0.74, "learning_rate": 7.922275547231652e-08, "logits/chosen": -2.600986957550049, "logits/rejected": -2.8939731121063232, "logps/chosen": -308.0492858886719, "logps/rejected": -184.551513671875, "loss": 0.5644, "rewards/accuracies": 0.75, "rewards/chosen": -0.4691298007965088, "rewards/margins": 1.3447052240371704, "rewards/rejected": -1.8138350248336792, "step": 6418 }, { "epoch": 0.74, "learning_rate": 7.918763900269226e-08, "logits/chosen": -2.6393394470214844, "logits/rejected": -2.6255719661712646, "logps/chosen": -412.720947265625, "logps/rejected": -305.37445068359375, "loss": 0.4452, "rewards/accuracies": 0.75, "rewards/chosen": 0.8761891722679138, "rewards/margins": 1.6873996257781982, "rewards/rejected": -0.8112105131149292, "step": 6419 }, { "epoch": 0.74, "learning_rate": 7.9152522533068e-08, "logits/chosen": -3.1580066680908203, "logits/rejected": -3.27726674079895, "logps/chosen": -212.27728271484375, "logps/rejected": -212.1817169189453, "loss": 0.445, "rewards/accuracies": 0.875, "rewards/chosen": -0.854422390460968, "rewards/margins": 1.4548239707946777, "rewards/rejected": -2.30924654006958, "step": 6420 }, { "epoch": 0.74, "learning_rate": 7.911740606344376e-08, "logits/chosen": -3.57234263420105, "logits/rejected": -3.3071887493133545, "logps/chosen": -341.68768310546875, "logps/rejected": -379.4610290527344, "loss": 0.5278, "rewards/accuracies": 0.625, "rewards/chosen": -0.29412856698036194, "rewards/margins": 1.1350258588790894, "rewards/rejected": -1.429154396057129, "step": 6421 }, { "epoch": 0.74, "learning_rate": 7.90822895938195e-08, "logits/chosen": -3.5060548782348633, "logits/rejected": -3.3839635848999023, "logps/chosen": -280.2442626953125, "logps/rejected": -234.03265380859375, "loss": 0.3762, "rewards/accuracies": 0.75, "rewards/chosen": 0.43068408966064453, "rewards/margins": 1.528872013092041, "rewards/rejected": -1.0981879234313965, "step": 6422 }, { "epoch": 0.74, "learning_rate": 7.904717312419524e-08, "logits/chosen": -3.278953790664673, "logits/rejected": -3.2084038257598877, "logps/chosen": -144.41165161132812, "logps/rejected": -248.35287475585938, "loss": 0.4286, "rewards/accuracies": 0.75, "rewards/chosen": 0.3198094367980957, "rewards/margins": 0.879848301410675, "rewards/rejected": -0.5600388646125793, "step": 6423 }, { "epoch": 0.74, "learning_rate": 7.901205665457098e-08, "logits/chosen": -3.436493158340454, "logits/rejected": -2.995820999145508, "logps/chosen": -600.589111328125, "logps/rejected": -253.78677368164062, "loss": 0.3408, "rewards/accuracies": 0.875, "rewards/chosen": -0.5846751928329468, "rewards/margins": 1.6193628311157227, "rewards/rejected": -2.204037666320801, "step": 6424 }, { "epoch": 0.74, "learning_rate": 7.897694018494675e-08, "logits/chosen": -3.365835428237915, "logits/rejected": -3.244309425354004, "logps/chosen": -102.35816955566406, "logps/rejected": -141.8848114013672, "loss": 0.3855, "rewards/accuracies": 0.875, "rewards/chosen": -0.35344141721725464, "rewards/margins": 1.7179454565048218, "rewards/rejected": -2.0713868141174316, "step": 6425 }, { "epoch": 0.74, "learning_rate": 7.894182371532249e-08, "logits/chosen": -2.6167845726013184, "logits/rejected": -2.8456854820251465, "logps/chosen": -114.60612487792969, "logps/rejected": -258.79095458984375, "loss": 0.2206, "rewards/accuracies": 0.875, "rewards/chosen": 0.11766412854194641, "rewards/margins": 2.6881935596466064, "rewards/rejected": -2.5705294609069824, "step": 6426 }, { "epoch": 0.74, "learning_rate": 7.890670724569823e-08, "logits/chosen": -3.733285427093506, "logits/rejected": -3.630037546157837, "logps/chosen": -181.77764892578125, "logps/rejected": -130.56777954101562, "loss": 0.3525, "rewards/accuracies": 1.0, "rewards/chosen": 0.19714099168777466, "rewards/margins": 1.4447553157806396, "rewards/rejected": -1.2476142644882202, "step": 6427 }, { "epoch": 0.74, "learning_rate": 7.887159077607397e-08, "logits/chosen": -3.0915865898132324, "logits/rejected": -3.127490997314453, "logps/chosen": -189.06521606445312, "logps/rejected": -222.28404235839844, "loss": 0.3588, "rewards/accuracies": 0.75, "rewards/chosen": -0.2025388777256012, "rewards/margins": 1.9891682863235474, "rewards/rejected": -2.191707134246826, "step": 6428 }, { "epoch": 0.74, "learning_rate": 7.883647430644973e-08, "logits/chosen": -2.939854621887207, "logits/rejected": -2.994647979736328, "logps/chosen": -331.1605224609375, "logps/rejected": -325.30084228515625, "loss": 0.4809, "rewards/accuracies": 0.875, "rewards/chosen": -0.39373812079429626, "rewards/margins": 1.2941648960113525, "rewards/rejected": -1.6879029273986816, "step": 6429 }, { "epoch": 0.74, "learning_rate": 7.880135783682548e-08, "logits/chosen": -3.0066096782684326, "logits/rejected": -2.836583137512207, "logps/chosen": -328.8945617675781, "logps/rejected": -418.656494140625, "loss": 0.5088, "rewards/accuracies": 0.875, "rewards/chosen": -0.0811169296503067, "rewards/margins": 1.0951902866363525, "rewards/rejected": -1.176307201385498, "step": 6430 }, { "epoch": 0.74, "learning_rate": 7.876624136720122e-08, "logits/chosen": -2.556809902191162, "logits/rejected": -2.7570528984069824, "logps/chosen": -265.0262451171875, "logps/rejected": -257.71258544921875, "loss": 0.3842, "rewards/accuracies": 0.75, "rewards/chosen": 0.25023895502090454, "rewards/margins": 1.2042670249938965, "rewards/rejected": -0.9540281891822815, "step": 6431 }, { "epoch": 0.74, "learning_rate": 7.873112489757696e-08, "logits/chosen": -3.328857898712158, "logits/rejected": -3.3412721157073975, "logps/chosen": -137.2555389404297, "logps/rejected": -260.1296081542969, "loss": 0.1855, "rewards/accuracies": 0.875, "rewards/chosen": 0.46027398109436035, "rewards/margins": 3.091370105743408, "rewards/rejected": -2.631096363067627, "step": 6432 }, { "epoch": 0.74, "learning_rate": 7.86960084279527e-08, "logits/chosen": -3.1151885986328125, "logits/rejected": -3.8100008964538574, "logps/chosen": -122.67564392089844, "logps/rejected": -138.04373168945312, "loss": 0.5859, "rewards/accuracies": 0.625, "rewards/chosen": -0.6028680801391602, "rewards/margins": 0.7388539910316467, "rewards/rejected": -1.3417221307754517, "step": 6433 }, { "epoch": 0.74, "learning_rate": 7.866089195832845e-08, "logits/chosen": -3.106966972351074, "logits/rejected": -3.1327462196350098, "logps/chosen": -303.5438537597656, "logps/rejected": -318.565185546875, "loss": 0.5153, "rewards/accuracies": 0.625, "rewards/chosen": 0.22139447927474976, "rewards/margins": 1.160252571105957, "rewards/rejected": -0.938857913017273, "step": 6434 }, { "epoch": 0.74, "learning_rate": 7.86257754887042e-08, "logits/chosen": -2.7859878540039062, "logits/rejected": -2.6443378925323486, "logps/chosen": -368.04296875, "logps/rejected": -341.12554931640625, "loss": 0.1658, "rewards/accuracies": 1.0, "rewards/chosen": 0.367580771446228, "rewards/margins": 2.7012882232666016, "rewards/rejected": -2.333707332611084, "step": 6435 }, { "epoch": 0.74, "learning_rate": 7.859065901907995e-08, "logits/chosen": -3.4898462295532227, "logits/rejected": -3.5386297702789307, "logps/chosen": -222.08287048339844, "logps/rejected": -194.33514404296875, "loss": 0.2954, "rewards/accuracies": 1.0, "rewards/chosen": 0.6948533654212952, "rewards/margins": 1.697340726852417, "rewards/rejected": -1.0024874210357666, "step": 6436 }, { "epoch": 0.74, "learning_rate": 7.855554254945569e-08, "logits/chosen": -3.062453031539917, "logits/rejected": -3.28981876373291, "logps/chosen": -184.9267578125, "logps/rejected": -275.02825927734375, "loss": 0.385, "rewards/accuracies": 0.875, "rewards/chosen": -0.35631585121154785, "rewards/margins": 1.3602895736694336, "rewards/rejected": -1.7166054248809814, "step": 6437 }, { "epoch": 0.74, "learning_rate": 7.852042607983144e-08, "logits/chosen": -2.796786308288574, "logits/rejected": -2.7710378170013428, "logps/chosen": -197.33758544921875, "logps/rejected": -208.72271728515625, "loss": 0.6486, "rewards/accuracies": 0.75, "rewards/chosen": -0.7828726768493652, "rewards/margins": 1.1915662288665771, "rewards/rejected": -1.974439024925232, "step": 6438 }, { "epoch": 0.74, "learning_rate": 7.848530961020718e-08, "logits/chosen": -2.8964333534240723, "logits/rejected": -2.9952995777130127, "logps/chosen": -296.0469970703125, "logps/rejected": -177.16619873046875, "loss": 0.613, "rewards/accuracies": 0.5, "rewards/chosen": -0.7515465021133423, "rewards/margins": 0.48830538988113403, "rewards/rejected": -1.2398518323898315, "step": 6439 }, { "epoch": 0.74, "learning_rate": 7.845019314058292e-08, "logits/chosen": -2.991663932800293, "logits/rejected": -2.755199670791626, "logps/chosen": -169.64254760742188, "logps/rejected": -281.62921142578125, "loss": 0.5176, "rewards/accuracies": 0.625, "rewards/chosen": -0.24918991327285767, "rewards/margins": 1.3706790208816528, "rewards/rejected": -1.6198689937591553, "step": 6440 }, { "epoch": 0.74, "learning_rate": 7.841507667095866e-08, "logits/chosen": -2.790821075439453, "logits/rejected": -2.8970396518707275, "logps/chosen": -342.53814697265625, "logps/rejected": -256.77569580078125, "loss": 0.5321, "rewards/accuracies": 0.75, "rewards/chosen": -0.12828460335731506, "rewards/margins": 2.112487316131592, "rewards/rejected": -2.240771770477295, "step": 6441 }, { "epoch": 0.74, "learning_rate": 7.837996020133443e-08, "logits/chosen": -2.9871037006378174, "logits/rejected": -3.110673189163208, "logps/chosen": -242.28240966796875, "logps/rejected": -267.92486572265625, "loss": 0.2266, "rewards/accuracies": 0.875, "rewards/chosen": 0.33631861209869385, "rewards/margins": 2.2164556980133057, "rewards/rejected": -1.8801369667053223, "step": 6442 }, { "epoch": 0.74, "learning_rate": 7.834484373171017e-08, "logits/chosen": -3.826474666595459, "logits/rejected": -3.6512093544006348, "logps/chosen": -154.7283172607422, "logps/rejected": -178.46890258789062, "loss": 0.3323, "rewards/accuracies": 0.875, "rewards/chosen": -0.051804088056087494, "rewards/margins": 1.9189543724060059, "rewards/rejected": -1.9707584381103516, "step": 6443 }, { "epoch": 0.74, "learning_rate": 7.830972726208591e-08, "logits/chosen": -2.5595662593841553, "logits/rejected": -2.4383225440979004, "logps/chosen": -205.67005920410156, "logps/rejected": -321.8591003417969, "loss": 0.3382, "rewards/accuracies": 0.75, "rewards/chosen": 0.03614449501037598, "rewards/margins": 2.0067455768585205, "rewards/rejected": -1.970601201057434, "step": 6444 }, { "epoch": 0.74, "learning_rate": 7.827461079246165e-08, "logits/chosen": -2.9525718688964844, "logits/rejected": -2.8749959468841553, "logps/chosen": -196.96963500976562, "logps/rejected": -239.5850830078125, "loss": 0.6275, "rewards/accuracies": 0.75, "rewards/chosen": -0.14940962195396423, "rewards/margins": 0.7104698419570923, "rewards/rejected": -0.8598795533180237, "step": 6445 }, { "epoch": 0.74, "learning_rate": 7.823949432283742e-08, "logits/chosen": -2.9717981815338135, "logits/rejected": -2.896757125854492, "logps/chosen": -242.0891571044922, "logps/rejected": -261.220458984375, "loss": 0.1385, "rewards/accuracies": 0.875, "rewards/chosen": 0.5329132080078125, "rewards/margins": 3.6137008666992188, "rewards/rejected": -3.0807876586914062, "step": 6446 }, { "epoch": 0.74, "learning_rate": 7.820437785321316e-08, "logits/chosen": -2.7301371097564697, "logits/rejected": -2.7544052600860596, "logps/chosen": -308.7724609375, "logps/rejected": -376.7019958496094, "loss": 0.33, "rewards/accuracies": 0.75, "rewards/chosen": 0.16255971789360046, "rewards/margins": 3.549356460571289, "rewards/rejected": -3.386796474456787, "step": 6447 }, { "epoch": 0.74, "learning_rate": 7.81692613835889e-08, "logits/chosen": -3.586872100830078, "logits/rejected": -3.6079599857330322, "logps/chosen": -224.94430541992188, "logps/rejected": -261.7461853027344, "loss": 0.294, "rewards/accuracies": 0.875, "rewards/chosen": -0.4623001217842102, "rewards/margins": 2.1842904090881348, "rewards/rejected": -2.6465904712677, "step": 6448 }, { "epoch": 0.74, "learning_rate": 7.813414491396464e-08, "logits/chosen": -2.221890926361084, "logits/rejected": -2.5076801776885986, "logps/chosen": -368.9783020019531, "logps/rejected": -338.59368896484375, "loss": 0.2227, "rewards/accuracies": 0.875, "rewards/chosen": 0.3380822539329529, "rewards/margins": 2.8566441535949707, "rewards/rejected": -2.518561840057373, "step": 6449 }, { "epoch": 0.74, "learning_rate": 7.80990284443404e-08, "logits/chosen": -2.3335511684417725, "logits/rejected": -2.4149727821350098, "logps/chosen": -363.0848388671875, "logps/rejected": -289.3457946777344, "loss": 0.4195, "rewards/accuracies": 0.875, "rewards/chosen": -0.2778128683567047, "rewards/margins": 2.289132833480835, "rewards/rejected": -2.566945791244507, "step": 6450 }, { "epoch": 0.74, "learning_rate": 7.806391197471613e-08, "logits/chosen": -3.797441005706787, "logits/rejected": -3.7945451736450195, "logps/chosen": -388.6646423339844, "logps/rejected": -509.83148193359375, "loss": 0.4355, "rewards/accuracies": 0.875, "rewards/chosen": -0.0017329156398773193, "rewards/margins": 2.1972265243530273, "rewards/rejected": -2.1989593505859375, "step": 6451 }, { "epoch": 0.74, "learning_rate": 7.802879550509189e-08, "logits/chosen": -2.4465785026550293, "logits/rejected": -2.6925251483917236, "logps/chosen": -443.947998046875, "logps/rejected": -427.9801330566406, "loss": 0.4823, "rewards/accuracies": 0.75, "rewards/chosen": -0.20724400877952576, "rewards/margins": 1.4689677953720093, "rewards/rejected": -1.6762118339538574, "step": 6452 }, { "epoch": 0.74, "learning_rate": 7.799367903546763e-08, "logits/chosen": -2.629246234893799, "logits/rejected": -2.5227861404418945, "logps/chosen": -204.33900451660156, "logps/rejected": -199.80282592773438, "loss": 0.4519, "rewards/accuracies": 0.75, "rewards/chosen": -0.6496258974075317, "rewards/margins": 0.9693929553031921, "rewards/rejected": -1.619018793106079, "step": 6453 }, { "epoch": 0.74, "learning_rate": 7.795856256584338e-08, "logits/chosen": -1.957680106163025, "logits/rejected": -2.047872304916382, "logps/chosen": -403.48095703125, "logps/rejected": -300.4806213378906, "loss": 0.6239, "rewards/accuracies": 0.625, "rewards/chosen": -0.018184572458267212, "rewards/margins": 0.6611497402191162, "rewards/rejected": -0.6793343424797058, "step": 6454 }, { "epoch": 0.74, "learning_rate": 7.792344609621912e-08, "logits/chosen": -2.8011691570281982, "logits/rejected": -3.073173761367798, "logps/chosen": -248.10963439941406, "logps/rejected": -205.0605010986328, "loss": 0.419, "rewards/accuracies": 0.875, "rewards/chosen": 0.01224873960018158, "rewards/margins": 1.0846678018569946, "rewards/rejected": -1.0724190473556519, "step": 6455 }, { "epoch": 0.74, "learning_rate": 7.788832962659486e-08, "logits/chosen": -2.9114861488342285, "logits/rejected": -3.0614700317382812, "logps/chosen": -162.31491088867188, "logps/rejected": -206.80274963378906, "loss": 0.4839, "rewards/accuracies": 0.625, "rewards/chosen": 0.042829930782318115, "rewards/margins": 1.900216817855835, "rewards/rejected": -1.8573869466781616, "step": 6456 }, { "epoch": 0.74, "learning_rate": 7.78532131569706e-08, "logits/chosen": -3.803100109100342, "logits/rejected": -3.5004806518554688, "logps/chosen": -289.9686279296875, "logps/rejected": -228.89007568359375, "loss": 0.347, "rewards/accuracies": 0.75, "rewards/chosen": 0.5590882897377014, "rewards/margins": 1.36179518699646, "rewards/rejected": -0.8027068376541138, "step": 6457 }, { "epoch": 0.74, "learning_rate": 7.781809668734637e-08, "logits/chosen": -3.0667452812194824, "logits/rejected": -2.779268741607666, "logps/chosen": -157.91685485839844, "logps/rejected": -111.47301483154297, "loss": 0.6946, "rewards/accuracies": 0.625, "rewards/chosen": -0.3417852222919464, "rewards/margins": 0.4036257863044739, "rewards/rejected": -0.7454110383987427, "step": 6458 }, { "epoch": 0.74, "learning_rate": 7.778298021772211e-08, "logits/chosen": -2.27905011177063, "logits/rejected": -2.396794557571411, "logps/chosen": -260.6044616699219, "logps/rejected": -199.41409301757812, "loss": 0.4745, "rewards/accuracies": 0.625, "rewards/chosen": 0.09461800754070282, "rewards/margins": 1.592972755432129, "rewards/rejected": -1.4983547925949097, "step": 6459 }, { "epoch": 0.74, "learning_rate": 7.774786374809785e-08, "logits/chosen": -2.370687961578369, "logits/rejected": -2.562455892562866, "logps/chosen": -413.1358337402344, "logps/rejected": -236.24578857421875, "loss": 0.4719, "rewards/accuracies": 0.75, "rewards/chosen": 0.04253290593624115, "rewards/margins": 1.184563398361206, "rewards/rejected": -1.1420304775238037, "step": 6460 }, { "epoch": 0.74, "learning_rate": 7.77127472784736e-08, "logits/chosen": -2.976067304611206, "logits/rejected": -3.039309024810791, "logps/chosen": -188.83444213867188, "logps/rejected": -283.1610107421875, "loss": 0.3443, "rewards/accuracies": 0.875, "rewards/chosen": -0.34446537494659424, "rewards/margins": 2.617556095123291, "rewards/rejected": -2.9620213508605957, "step": 6461 }, { "epoch": 0.74, "learning_rate": 7.767763080884935e-08, "logits/chosen": -2.663421630859375, "logits/rejected": -2.738847494125366, "logps/chosen": -343.8318786621094, "logps/rejected": -344.9078063964844, "loss": 0.3352, "rewards/accuracies": 0.75, "rewards/chosen": 0.24475470185279846, "rewards/margins": 1.6283448934555054, "rewards/rejected": -1.3835902214050293, "step": 6462 }, { "epoch": 0.75, "learning_rate": 7.76425143392251e-08, "logits/chosen": -3.265627145767212, "logits/rejected": -2.7743136882781982, "logps/chosen": -167.58860778808594, "logps/rejected": -235.95474243164062, "loss": 0.6213, "rewards/accuracies": 0.625, "rewards/chosen": -0.5353987216949463, "rewards/margins": 1.347619891166687, "rewards/rejected": -1.8830187320709229, "step": 6463 }, { "epoch": 0.75, "learning_rate": 7.760739786960084e-08, "logits/chosen": -2.7575244903564453, "logits/rejected": -2.8512141704559326, "logps/chosen": -198.43621826171875, "logps/rejected": -201.7295684814453, "loss": 0.4046, "rewards/accuracies": 0.75, "rewards/chosen": 0.023220881819725037, "rewards/margins": 1.2861474752426147, "rewards/rejected": -1.262926697731018, "step": 6464 }, { "epoch": 0.75, "learning_rate": 7.757228139997658e-08, "logits/chosen": -2.938166856765747, "logits/rejected": -2.9401979446411133, "logps/chosen": -410.30169677734375, "logps/rejected": -249.59146118164062, "loss": 0.3811, "rewards/accuracies": 0.75, "rewards/chosen": 0.44342729449272156, "rewards/margins": 1.0467824935913086, "rewards/rejected": -0.6033551096916199, "step": 6465 }, { "epoch": 0.75, "learning_rate": 7.753716493035234e-08, "logits/chosen": -2.6886961460113525, "logits/rejected": -2.662539482116699, "logps/chosen": -313.06982421875, "logps/rejected": -261.8187255859375, "loss": 0.5557, "rewards/accuracies": 0.625, "rewards/chosen": -0.7038354873657227, "rewards/margins": 1.35688054561615, "rewards/rejected": -2.060716152191162, "step": 6466 }, { "epoch": 0.75, "learning_rate": 7.750204846072808e-08, "logits/chosen": -3.495527744293213, "logits/rejected": -3.6539969444274902, "logps/chosen": -125.57868957519531, "logps/rejected": -165.42938232421875, "loss": 0.3294, "rewards/accuracies": 0.75, "rewards/chosen": -0.12933042645454407, "rewards/margins": 1.6982178688049316, "rewards/rejected": -1.8275482654571533, "step": 6467 }, { "epoch": 0.75, "learning_rate": 7.746693199110382e-08, "logits/chosen": -2.175307035446167, "logits/rejected": -2.2903549671173096, "logps/chosen": -273.85369873046875, "logps/rejected": -317.2001037597656, "loss": 0.3841, "rewards/accuracies": 0.75, "rewards/chosen": 0.3220002055168152, "rewards/margins": 1.8508973121643066, "rewards/rejected": -1.5288970470428467, "step": 6468 }, { "epoch": 0.75, "learning_rate": 7.743181552147957e-08, "logits/chosen": -3.516238212585449, "logits/rejected": -3.28208065032959, "logps/chosen": -338.69451904296875, "logps/rejected": -224.90167236328125, "loss": 0.3856, "rewards/accuracies": 0.875, "rewards/chosen": -0.19431215524673462, "rewards/margins": 1.2812305688858032, "rewards/rejected": -1.475542664527893, "step": 6469 }, { "epoch": 0.75, "learning_rate": 7.739669905185533e-08, "logits/chosen": -3.49448823928833, "logits/rejected": -3.4101572036743164, "logps/chosen": -317.0505676269531, "logps/rejected": -309.2614440917969, "loss": 0.307, "rewards/accuracies": 0.875, "rewards/chosen": 0.24605783820152283, "rewards/margins": 3.482137441635132, "rewards/rejected": -3.236079692840576, "step": 6470 }, { "epoch": 0.75, "learning_rate": 7.736158258223107e-08, "logits/chosen": -3.598710060119629, "logits/rejected": -3.518826723098755, "logps/chosen": -155.84597778320312, "logps/rejected": -220.13134765625, "loss": 0.4369, "rewards/accuracies": 0.875, "rewards/chosen": 0.2908773422241211, "rewards/margins": 0.7860502004623413, "rewards/rejected": -0.495172917842865, "step": 6471 }, { "epoch": 0.75, "learning_rate": 7.732646611260681e-08, "logits/chosen": -2.9399354457855225, "logits/rejected": -2.9047601222991943, "logps/chosen": -146.7411651611328, "logps/rejected": -236.7665557861328, "loss": 0.6512, "rewards/accuracies": 0.625, "rewards/chosen": -0.6469926834106445, "rewards/margins": 0.5870485305786133, "rewards/rejected": -1.2340412139892578, "step": 6472 }, { "epoch": 0.75, "learning_rate": 7.729134964298255e-08, "logits/chosen": -2.8062145709991455, "logits/rejected": -2.9400105476379395, "logps/chosen": -387.98846435546875, "logps/rejected": -378.82940673828125, "loss": 0.4501, "rewards/accuracies": 0.75, "rewards/chosen": -0.2947269678115845, "rewards/margins": 1.979923129081726, "rewards/rejected": -2.2746500968933105, "step": 6473 }, { "epoch": 0.75, "learning_rate": 7.725623317335831e-08, "logits/chosen": -2.7872681617736816, "logits/rejected": -2.628687858581543, "logps/chosen": -215.11219787597656, "logps/rejected": -183.9815673828125, "loss": 0.8189, "rewards/accuracies": 0.5, "rewards/chosen": -0.4634980261325836, "rewards/margins": 0.2766202688217163, "rewards/rejected": -0.7401183843612671, "step": 6474 }, { "epoch": 0.75, "learning_rate": 7.722111670373405e-08, "logits/chosen": -2.7798078060150146, "logits/rejected": -2.8525900840759277, "logps/chosen": -261.1553649902344, "logps/rejected": -85.6917953491211, "loss": 0.5672, "rewards/accuracies": 0.5, "rewards/chosen": -0.3356441855430603, "rewards/margins": 0.5613486766815186, "rewards/rejected": -0.8969928026199341, "step": 6475 }, { "epoch": 0.75, "learning_rate": 7.71860002341098e-08, "logits/chosen": -3.0236315727233887, "logits/rejected": -3.264103651046753, "logps/chosen": -310.6517333984375, "logps/rejected": -256.31103515625, "loss": 0.1968, "rewards/accuracies": 0.875, "rewards/chosen": 0.04125623404979706, "rewards/margins": 3.073268413543701, "rewards/rejected": -3.0320119857788086, "step": 6476 }, { "epoch": 0.75, "learning_rate": 7.715088376448554e-08, "logits/chosen": -3.1249635219573975, "logits/rejected": -3.0256919860839844, "logps/chosen": -243.515380859375, "logps/rejected": -299.82598876953125, "loss": 0.6058, "rewards/accuracies": 0.75, "rewards/chosen": 0.3581535220146179, "rewards/margins": 1.5307197570800781, "rewards/rejected": -1.1725661754608154, "step": 6477 }, { "epoch": 0.75, "learning_rate": 7.711576729486129e-08, "logits/chosen": -2.7523279190063477, "logits/rejected": -3.046518564224243, "logps/chosen": -236.1796875, "logps/rejected": -226.11782836914062, "loss": 0.3512, "rewards/accuracies": 0.75, "rewards/chosen": -0.22763609886169434, "rewards/margins": 1.9732410907745361, "rewards/rejected": -2.2008771896362305, "step": 6478 }, { "epoch": 0.75, "learning_rate": 7.708065082523703e-08, "logits/chosen": -3.1514666080474854, "logits/rejected": -3.377070426940918, "logps/chosen": -254.89974975585938, "logps/rejected": -245.13015747070312, "loss": 0.4774, "rewards/accuracies": 0.75, "rewards/chosen": -0.4769400358200073, "rewards/margins": 1.8159513473510742, "rewards/rejected": -2.292891502380371, "step": 6479 }, { "epoch": 0.75, "learning_rate": 7.704553435561278e-08, "logits/chosen": -2.430668830871582, "logits/rejected": -2.4704012870788574, "logps/chosen": -369.027587890625, "logps/rejected": -246.21453857421875, "loss": 0.4602, "rewards/accuracies": 0.75, "rewards/chosen": 0.01004992425441742, "rewards/margins": 2.0527431964874268, "rewards/rejected": -2.0426933765411377, "step": 6480 }, { "epoch": 0.75, "learning_rate": 7.701041788598853e-08, "logits/chosen": -2.7770090103149414, "logits/rejected": -2.5841822624206543, "logps/chosen": -407.4916076660156, "logps/rejected": -360.21533203125, "loss": 0.1751, "rewards/accuracies": 1.0, "rewards/chosen": 0.07695138454437256, "rewards/margins": 2.216918706893921, "rewards/rejected": -2.139967203140259, "step": 6481 }, { "epoch": 0.75, "learning_rate": 7.697530141636427e-08, "logits/chosen": -2.646050214767456, "logits/rejected": -2.833428382873535, "logps/chosen": -100.26174926757812, "logps/rejected": -241.74966430664062, "loss": 0.1991, "rewards/accuracies": 1.0, "rewards/chosen": 0.22922126948833466, "rewards/margins": 2.1988589763641357, "rewards/rejected": -1.9696377515792847, "step": 6482 }, { "epoch": 0.75, "learning_rate": 7.694018494674002e-08, "logits/chosen": -3.226240634918213, "logits/rejected": -3.4811646938323975, "logps/chosen": -91.27635192871094, "logps/rejected": -236.05487060546875, "loss": 0.2894, "rewards/accuracies": 0.875, "rewards/chosen": 0.14726081490516663, "rewards/margins": 2.4725241661071777, "rewards/rejected": -2.325263023376465, "step": 6483 }, { "epoch": 0.75, "learning_rate": 7.690506847711576e-08, "logits/chosen": -3.153597593307495, "logits/rejected": -3.48480486869812, "logps/chosen": -135.2821044921875, "logps/rejected": -205.56961059570312, "loss": 0.5563, "rewards/accuracies": 0.625, "rewards/chosen": 0.06084933876991272, "rewards/margins": 1.118162751197815, "rewards/rejected": -1.057313323020935, "step": 6484 }, { "epoch": 0.75, "learning_rate": 7.68699520074915e-08, "logits/chosen": -3.8122591972351074, "logits/rejected": -3.4576544761657715, "logps/chosen": -229.5215301513672, "logps/rejected": -236.5208282470703, "loss": 0.2303, "rewards/accuracies": 0.875, "rewards/chosen": 0.5939233899116516, "rewards/margins": 2.7691822052001953, "rewards/rejected": -2.1752588748931885, "step": 6485 }, { "epoch": 0.75, "learning_rate": 7.683483553786725e-08, "logits/chosen": -2.7706494331359863, "logits/rejected": -2.802107095718384, "logps/chosen": -139.47958374023438, "logps/rejected": -203.73521423339844, "loss": 0.3474, "rewards/accuracies": 0.875, "rewards/chosen": 0.4977773129940033, "rewards/margins": 1.4175978899002075, "rewards/rejected": -0.9198206067085266, "step": 6486 }, { "epoch": 0.75, "learning_rate": 7.679971906824301e-08, "logits/chosen": -2.6499171257019043, "logits/rejected": -2.5231680870056152, "logps/chosen": -394.2127685546875, "logps/rejected": -271.6762390136719, "loss": 0.2705, "rewards/accuracies": 1.0, "rewards/chosen": 0.018754959106445312, "rewards/margins": 1.9008960723876953, "rewards/rejected": -1.8821409940719604, "step": 6487 }, { "epoch": 0.75, "learning_rate": 7.676460259861875e-08, "logits/chosen": -2.912457227706909, "logits/rejected": -2.9687581062316895, "logps/chosen": -466.8310241699219, "logps/rejected": -396.41912841796875, "loss": 0.3558, "rewards/accuracies": 0.75, "rewards/chosen": 0.304269015789032, "rewards/margins": 1.706907033920288, "rewards/rejected": -1.4026378393173218, "step": 6488 }, { "epoch": 0.75, "learning_rate": 7.672948612899449e-08, "logits/chosen": -3.2861125469207764, "logits/rejected": -3.310567855834961, "logps/chosen": -323.02691650390625, "logps/rejected": -328.266357421875, "loss": 0.1278, "rewards/accuracies": 1.0, "rewards/chosen": -0.1362614929676056, "rewards/margins": 3.3413617610931396, "rewards/rejected": -3.477623224258423, "step": 6489 }, { "epoch": 0.75, "learning_rate": 7.669436965937023e-08, "logits/chosen": -3.835117816925049, "logits/rejected": -3.4782800674438477, "logps/chosen": -256.03033447265625, "logps/rejected": -191.8092803955078, "loss": 0.2582, "rewards/accuracies": 1.0, "rewards/chosen": 0.17934419214725494, "rewards/margins": 1.3111696243286133, "rewards/rejected": -1.1318254470825195, "step": 6490 }, { "epoch": 0.75, "learning_rate": 7.6659253189746e-08, "logits/chosen": -2.8962535858154297, "logits/rejected": -2.8031957149505615, "logps/chosen": -287.2691955566406, "logps/rejected": -274.0860900878906, "loss": 0.7863, "rewards/accuracies": 0.625, "rewards/chosen": -0.09967190027236938, "rewards/margins": 0.35114896297454834, "rewards/rejected": -0.4508208930492401, "step": 6491 }, { "epoch": 0.75, "learning_rate": 7.662413672012174e-08, "logits/chosen": -2.1327548027038574, "logits/rejected": -2.3547451496124268, "logps/chosen": -432.81805419921875, "logps/rejected": -262.3018798828125, "loss": 0.2556, "rewards/accuracies": 0.875, "rewards/chosen": 0.015278682112693787, "rewards/margins": 1.9702988862991333, "rewards/rejected": -1.9550203084945679, "step": 6492 }, { "epoch": 0.75, "learning_rate": 7.658902025049748e-08, "logits/chosen": -3.563631057739258, "logits/rejected": -3.6595945358276367, "logps/chosen": -385.14483642578125, "logps/rejected": -184.54776000976562, "loss": 0.5147, "rewards/accuracies": 0.875, "rewards/chosen": -0.42551517486572266, "rewards/margins": 1.3504798412322998, "rewards/rejected": -1.7759950160980225, "step": 6493 }, { "epoch": 0.75, "learning_rate": 7.655390378087322e-08, "logits/chosen": -3.4432175159454346, "logits/rejected": -3.4605228900909424, "logps/chosen": -241.50985717773438, "logps/rejected": -262.1628112792969, "loss": 0.3358, "rewards/accuracies": 0.625, "rewards/chosen": -0.05820727348327637, "rewards/margins": 2.607933282852173, "rewards/rejected": -2.6661407947540283, "step": 6494 }, { "epoch": 0.75, "learning_rate": 7.651878731124897e-08, "logits/chosen": -3.345986843109131, "logits/rejected": -3.1482748985290527, "logps/chosen": -154.74949645996094, "logps/rejected": -147.97879028320312, "loss": 0.4647, "rewards/accuracies": 0.75, "rewards/chosen": -0.47857144474983215, "rewards/margins": 1.7878497838974, "rewards/rejected": -2.266421318054199, "step": 6495 }, { "epoch": 0.75, "learning_rate": 7.648367084162471e-08, "logits/chosen": -2.862565755844116, "logits/rejected": -3.1387453079223633, "logps/chosen": -285.0855712890625, "logps/rejected": -311.04852294921875, "loss": 0.7567, "rewards/accuracies": 0.625, "rewards/chosen": -0.29181766510009766, "rewards/margins": 0.827465832233429, "rewards/rejected": -1.1192835569381714, "step": 6496 }, { "epoch": 0.75, "learning_rate": 7.644855437200047e-08, "logits/chosen": -2.906141757965088, "logits/rejected": -3.2622570991516113, "logps/chosen": -245.92922973632812, "logps/rejected": -175.70713806152344, "loss": 0.3614, "rewards/accuracies": 0.875, "rewards/chosen": -0.09336479008197784, "rewards/margins": 1.1728240251541138, "rewards/rejected": -1.2661888599395752, "step": 6497 }, { "epoch": 0.75, "learning_rate": 7.641343790237621e-08, "logits/chosen": -2.4805121421813965, "logits/rejected": -2.6254630088806152, "logps/chosen": -388.48260498046875, "logps/rejected": -259.4971618652344, "loss": 0.2122, "rewards/accuracies": 1.0, "rewards/chosen": 0.44157448410987854, "rewards/margins": 2.603321075439453, "rewards/rejected": -2.1617465019226074, "step": 6498 }, { "epoch": 0.75, "learning_rate": 7.637832143275196e-08, "logits/chosen": -3.412341356277466, "logits/rejected": -3.3840365409851074, "logps/chosen": -231.84307861328125, "logps/rejected": -217.4248046875, "loss": 0.2935, "rewards/accuracies": 0.875, "rewards/chosen": -0.06419935822486877, "rewards/margins": 2.5246706008911133, "rewards/rejected": -2.58886981010437, "step": 6499 }, { "epoch": 0.75, "learning_rate": 7.63432049631277e-08, "logits/chosen": -2.933264970779419, "logits/rejected": -3.0499138832092285, "logps/chosen": -141.75442504882812, "logps/rejected": -246.0009307861328, "loss": 0.3028, "rewards/accuracies": 0.875, "rewards/chosen": -0.23343193531036377, "rewards/margins": 3.0776004791259766, "rewards/rejected": -3.311032772064209, "step": 6500 }, { "epoch": 0.75, "learning_rate": 7.630808849350344e-08, "logits/chosen": -2.55734920501709, "logits/rejected": -2.356348991394043, "logps/chosen": -227.35995483398438, "logps/rejected": -170.04684448242188, "loss": 0.3116, "rewards/accuracies": 1.0, "rewards/chosen": 0.11066178977489471, "rewards/margins": 2.1427924633026123, "rewards/rejected": -2.032130718231201, "step": 6501 }, { "epoch": 0.75, "learning_rate": 7.627297202387918e-08, "logits/chosen": -3.721618175506592, "logits/rejected": -3.640932083129883, "logps/chosen": -220.23220825195312, "logps/rejected": -271.0509033203125, "loss": 1.3128, "rewards/accuracies": 0.375, "rewards/chosen": -1.3311026096343994, "rewards/margins": 0.12671315670013428, "rewards/rejected": -1.4578156471252441, "step": 6502 }, { "epoch": 0.75, "learning_rate": 7.623785555425495e-08, "logits/chosen": -3.321223735809326, "logits/rejected": -3.2864201068878174, "logps/chosen": -290.44427490234375, "logps/rejected": -283.0976257324219, "loss": 0.2498, "rewards/accuracies": 1.0, "rewards/chosen": 0.2212362289428711, "rewards/margins": 2.2553791999816895, "rewards/rejected": -2.0341432094573975, "step": 6503 }, { "epoch": 0.75, "learning_rate": 7.620273908463069e-08, "logits/chosen": -2.6551616191864014, "logits/rejected": -2.6672258377075195, "logps/chosen": -212.0513916015625, "logps/rejected": -185.9377899169922, "loss": 0.3492, "rewards/accuracies": 1.0, "rewards/chosen": -0.03805989772081375, "rewards/margins": 1.279830813407898, "rewards/rejected": -1.3178906440734863, "step": 6504 }, { "epoch": 0.75, "learning_rate": 7.616762261500643e-08, "logits/chosen": -3.584686279296875, "logits/rejected": -3.767699718475342, "logps/chosen": -104.268310546875, "logps/rejected": -176.30438232421875, "loss": 0.236, "rewards/accuracies": 0.875, "rewards/chosen": 1.3064998388290405, "rewards/margins": 2.479572296142578, "rewards/rejected": -1.1730725765228271, "step": 6505 }, { "epoch": 0.75, "learning_rate": 7.613250614538217e-08, "logits/chosen": -3.2867660522460938, "logits/rejected": -3.0920982360839844, "logps/chosen": -321.56304931640625, "logps/rejected": -360.7660217285156, "loss": 0.0888, "rewards/accuracies": 1.0, "rewards/chosen": 0.34653210639953613, "rewards/margins": 5.007863521575928, "rewards/rejected": -4.661332130432129, "step": 6506 }, { "epoch": 0.75, "learning_rate": 7.609738967575794e-08, "logits/chosen": -2.9876575469970703, "logits/rejected": -3.019923210144043, "logps/chosen": -373.105712890625, "logps/rejected": -399.05523681640625, "loss": 0.3237, "rewards/accuracies": 0.875, "rewards/chosen": -0.33076804876327515, "rewards/margins": 2.3888180255889893, "rewards/rejected": -2.71958589553833, "step": 6507 }, { "epoch": 0.75, "learning_rate": 7.606227320613368e-08, "logits/chosen": -2.9815986156463623, "logits/rejected": -3.1906673908233643, "logps/chosen": -122.9112777709961, "logps/rejected": -163.3248748779297, "loss": 0.442, "rewards/accuracies": 0.625, "rewards/chosen": 0.018664300441741943, "rewards/margins": 1.7209234237670898, "rewards/rejected": -1.7022590637207031, "step": 6508 }, { "epoch": 0.75, "learning_rate": 7.602715673650942e-08, "logits/chosen": -3.793114423751831, "logits/rejected": -3.602126121520996, "logps/chosen": -277.98248291015625, "logps/rejected": -254.28778076171875, "loss": 0.2329, "rewards/accuracies": 1.0, "rewards/chosen": 0.3082159161567688, "rewards/margins": 2.5891823768615723, "rewards/rejected": -2.2809667587280273, "step": 6509 }, { "epoch": 0.75, "learning_rate": 7.599204026688516e-08, "logits/chosen": -3.323521137237549, "logits/rejected": -3.162421464920044, "logps/chosen": -191.2319793701172, "logps/rejected": -232.73318481445312, "loss": 0.1884, "rewards/accuracies": 1.0, "rewards/chosen": -0.6039155721664429, "rewards/margins": 2.6276028156280518, "rewards/rejected": -3.2315187454223633, "step": 6510 }, { "epoch": 0.75, "learning_rate": 7.595692379726092e-08, "logits/chosen": -2.9219470024108887, "logits/rejected": -3.052826404571533, "logps/chosen": -95.91213989257812, "logps/rejected": -103.5226058959961, "loss": 0.3907, "rewards/accuracies": 0.875, "rewards/chosen": -0.15509366989135742, "rewards/margins": 1.2208197116851807, "rewards/rejected": -1.3759132623672485, "step": 6511 }, { "epoch": 0.75, "learning_rate": 7.592180732763666e-08, "logits/chosen": -2.7765085697174072, "logits/rejected": -2.6297311782836914, "logps/chosen": -255.2647705078125, "logps/rejected": -187.45362854003906, "loss": 0.5367, "rewards/accuracies": 0.75, "rewards/chosen": -0.5771779417991638, "rewards/margins": 1.3416435718536377, "rewards/rejected": -1.9188215732574463, "step": 6512 }, { "epoch": 0.75, "learning_rate": 7.58866908580124e-08, "logits/chosen": -2.9897921085357666, "logits/rejected": -3.4109253883361816, "logps/chosen": -216.80075073242188, "logps/rejected": -204.0928497314453, "loss": 0.3939, "rewards/accuracies": 0.75, "rewards/chosen": 0.115714892745018, "rewards/margins": 1.4345275163650513, "rewards/rejected": -1.3188127279281616, "step": 6513 }, { "epoch": 0.75, "learning_rate": 7.585157438838815e-08, "logits/chosen": -3.4047112464904785, "logits/rejected": -3.05232572555542, "logps/chosen": -345.5143127441406, "logps/rejected": -203.15670776367188, "loss": 0.3518, "rewards/accuracies": 0.875, "rewards/chosen": 0.41347840428352356, "rewards/margins": 1.628831148147583, "rewards/rejected": -1.2153527736663818, "step": 6514 }, { "epoch": 0.75, "learning_rate": 7.58164579187639e-08, "logits/chosen": -3.5607805252075195, "logits/rejected": -3.3864588737487793, "logps/chosen": -192.48434448242188, "logps/rejected": -246.35940551757812, "loss": 0.2178, "rewards/accuracies": 0.875, "rewards/chosen": 0.5536952614784241, "rewards/margins": 2.3185577392578125, "rewards/rejected": -1.7648625373840332, "step": 6515 }, { "epoch": 0.75, "learning_rate": 7.578134144913965e-08, "logits/chosen": -3.5859196186065674, "logits/rejected": -3.5124669075012207, "logps/chosen": -150.69947814941406, "logps/rejected": -137.43556213378906, "loss": 0.4847, "rewards/accuracies": 0.625, "rewards/chosen": 0.3370472490787506, "rewards/margins": 0.8788605332374573, "rewards/rejected": -0.5418132543563843, "step": 6516 }, { "epoch": 0.75, "learning_rate": 7.574622497951539e-08, "logits/chosen": -3.533740997314453, "logits/rejected": -3.088996410369873, "logps/chosen": -184.53836059570312, "logps/rejected": -232.36146545410156, "loss": 0.2832, "rewards/accuracies": 0.875, "rewards/chosen": 0.30797645449638367, "rewards/margins": 1.9857368469238281, "rewards/rejected": -1.677760362625122, "step": 6517 }, { "epoch": 0.75, "learning_rate": 7.571110850989113e-08, "logits/chosen": -2.908053398132324, "logits/rejected": -2.9372589588165283, "logps/chosen": -226.55169677734375, "logps/rejected": -289.9132080078125, "loss": 0.2553, "rewards/accuracies": 1.0, "rewards/chosen": 0.26379287242889404, "rewards/margins": 2.493405342102051, "rewards/rejected": -2.2296125888824463, "step": 6518 }, { "epoch": 0.75, "learning_rate": 7.56759920402669e-08, "logits/chosen": -3.2346673011779785, "logits/rejected": -3.3661887645721436, "logps/chosen": -333.4208068847656, "logps/rejected": -227.08041381835938, "loss": 0.4064, "rewards/accuracies": 0.75, "rewards/chosen": 0.04627283662557602, "rewards/margins": 1.468430757522583, "rewards/rejected": -1.422157883644104, "step": 6519 }, { "epoch": 0.75, "learning_rate": 7.564087557064263e-08, "logits/chosen": -3.1157736778259277, "logits/rejected": -3.807528495788574, "logps/chosen": -191.6927947998047, "logps/rejected": -255.67822265625, "loss": 0.3196, "rewards/accuracies": 0.75, "rewards/chosen": 0.1446167230606079, "rewards/margins": 2.9586405754089355, "rewards/rejected": -2.814023971557617, "step": 6520 }, { "epoch": 0.75, "learning_rate": 7.560575910101837e-08, "logits/chosen": -3.30830979347229, "logits/rejected": -2.6087608337402344, "logps/chosen": -409.8335266113281, "logps/rejected": -282.00921630859375, "loss": 0.1598, "rewards/accuracies": 0.875, "rewards/chosen": 0.26857730746269226, "rewards/margins": 2.9933900833129883, "rewards/rejected": -2.7248125076293945, "step": 6521 }, { "epoch": 0.75, "learning_rate": 7.557064263139412e-08, "logits/chosen": -3.2779014110565186, "logits/rejected": -3.1872267723083496, "logps/chosen": -112.30634307861328, "logps/rejected": -215.7979278564453, "loss": 0.3798, "rewards/accuracies": 0.875, "rewards/chosen": -0.2633770704269409, "rewards/margins": 1.795915961265564, "rewards/rejected": -2.059292793273926, "step": 6522 }, { "epoch": 0.75, "learning_rate": 7.553552616176987e-08, "logits/chosen": -2.8517587184906006, "logits/rejected": -3.4699535369873047, "logps/chosen": -198.90774536132812, "logps/rejected": -323.7243347167969, "loss": 0.4929, "rewards/accuracies": 0.75, "rewards/chosen": -0.5169811844825745, "rewards/margins": 1.3740003108978271, "rewards/rejected": -1.890981674194336, "step": 6523 }, { "epoch": 0.75, "learning_rate": 7.550040969214562e-08, "logits/chosen": -3.440267324447632, "logits/rejected": -3.6587626934051514, "logps/chosen": -291.5665283203125, "logps/rejected": -326.76947021484375, "loss": 0.4492, "rewards/accuracies": 0.75, "rewards/chosen": -0.11885866522789001, "rewards/margins": 2.3523738384246826, "rewards/rejected": -2.4712321758270264, "step": 6524 }, { "epoch": 0.75, "learning_rate": 7.546529322252136e-08, "logits/chosen": -3.2368786334991455, "logits/rejected": -3.089158773422241, "logps/chosen": -404.9744873046875, "logps/rejected": -363.9091796875, "loss": 0.1245, "rewards/accuracies": 1.0, "rewards/chosen": 0.3825230002403259, "rewards/margins": 3.537560224533081, "rewards/rejected": -3.1550374031066895, "step": 6525 }, { "epoch": 0.75, "learning_rate": 7.54301767528971e-08, "logits/chosen": -3.444892644882202, "logits/rejected": -3.101372718811035, "logps/chosen": -278.055908203125, "logps/rejected": -326.1573486328125, "loss": 0.6762, "rewards/accuracies": 0.75, "rewards/chosen": -0.5893330574035645, "rewards/margins": 0.5603678226470947, "rewards/rejected": -1.1497007608413696, "step": 6526 }, { "epoch": 0.75, "learning_rate": 7.539506028327285e-08, "logits/chosen": -3.052907943725586, "logits/rejected": -2.8268730640411377, "logps/chosen": -278.0086975097656, "logps/rejected": -356.0139465332031, "loss": 0.2018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4018939137458801, "rewards/margins": 2.136855125427246, "rewards/rejected": -2.5387492179870605, "step": 6527 }, { "epoch": 0.75, "learning_rate": 7.53599438136486e-08, "logits/chosen": -3.8642566204071045, "logits/rejected": -3.7533626556396484, "logps/chosen": -132.16558837890625, "logps/rejected": -190.14651489257812, "loss": 0.2779, "rewards/accuracies": 0.75, "rewards/chosen": -0.3817184269428253, "rewards/margins": 2.039013147354126, "rewards/rejected": -2.420731544494629, "step": 6528 }, { "epoch": 0.75, "learning_rate": 7.532482734402434e-08, "logits/chosen": -3.681105852127075, "logits/rejected": -3.413938045501709, "logps/chosen": -161.257080078125, "logps/rejected": -190.24114990234375, "loss": 0.6837, "rewards/accuracies": 0.625, "rewards/chosen": -1.0057098865509033, "rewards/margins": 1.7423654794692993, "rewards/rejected": -2.748075485229492, "step": 6529 }, { "epoch": 0.75, "learning_rate": 7.528971087440008e-08, "logits/chosen": -3.1546051502227783, "logits/rejected": -3.4408740997314453, "logps/chosen": -375.99066162109375, "logps/rejected": -393.6514587402344, "loss": 0.3022, "rewards/accuracies": 0.875, "rewards/chosen": -0.04169077426195145, "rewards/margins": 2.201643943786621, "rewards/rejected": -2.2433345317840576, "step": 6530 }, { "epoch": 0.75, "learning_rate": 7.525459440477583e-08, "logits/chosen": -3.6739842891693115, "logits/rejected": -3.5017249584198, "logps/chosen": -387.4170837402344, "logps/rejected": -368.8540954589844, "loss": 0.4688, "rewards/accuracies": 0.625, "rewards/chosen": -0.4473057985305786, "rewards/margins": 1.2038755416870117, "rewards/rejected": -1.6511814594268799, "step": 6531 }, { "epoch": 0.75, "learning_rate": 7.521947793515159e-08, "logits/chosen": -3.5818443298339844, "logits/rejected": -3.6348114013671875, "logps/chosen": -291.43756103515625, "logps/rejected": -295.6882019042969, "loss": 0.4599, "rewards/accuracies": 0.75, "rewards/chosen": -0.26185524463653564, "rewards/margins": 1.2087764739990234, "rewards/rejected": -1.4706318378448486, "step": 6532 }, { "epoch": 0.75, "learning_rate": 7.518436146552733e-08, "logits/chosen": -3.0696208477020264, "logits/rejected": -2.701815128326416, "logps/chosen": -391.3270568847656, "logps/rejected": -257.32366943359375, "loss": 0.3493, "rewards/accuracies": 0.875, "rewards/chosen": -0.1496993899345398, "rewards/margins": 1.5526443719863892, "rewards/rejected": -1.7023438215255737, "step": 6533 }, { "epoch": 0.75, "learning_rate": 7.514924499590307e-08, "logits/chosen": -3.4806714057922363, "logits/rejected": -3.2448201179504395, "logps/chosen": -282.55810546875, "logps/rejected": -318.3855285644531, "loss": 0.4413, "rewards/accuracies": 0.75, "rewards/chosen": 0.25659283995628357, "rewards/margins": 1.9953383207321167, "rewards/rejected": -1.7387455701828003, "step": 6534 }, { "epoch": 0.75, "learning_rate": 7.511412852627881e-08, "logits/chosen": -2.794063091278076, "logits/rejected": -2.7325100898742676, "logps/chosen": -284.23211669921875, "logps/rejected": -277.35882568359375, "loss": 0.4252, "rewards/accuracies": 0.875, "rewards/chosen": -0.3852311968803406, "rewards/margins": 0.9680879712104797, "rewards/rejected": -1.3533190488815308, "step": 6535 }, { "epoch": 0.75, "learning_rate": 7.507901205665458e-08, "logits/chosen": -2.3681752681732178, "logits/rejected": -2.484633445739746, "logps/chosen": -477.0494689941406, "logps/rejected": -317.46539306640625, "loss": 0.3556, "rewards/accuracies": 0.875, "rewards/chosen": -0.0661531463265419, "rewards/margins": 1.3548974990844727, "rewards/rejected": -1.421050786972046, "step": 6536 }, { "epoch": 0.75, "learning_rate": 7.504389558703032e-08, "logits/chosen": -3.001845359802246, "logits/rejected": -3.1923954486846924, "logps/chosen": -209.1826171875, "logps/rejected": -233.042724609375, "loss": 0.3112, "rewards/accuracies": 0.875, "rewards/chosen": 0.28428834676742554, "rewards/margins": 1.8297929763793945, "rewards/rejected": -1.5455045700073242, "step": 6537 }, { "epoch": 0.75, "learning_rate": 7.500877911740606e-08, "logits/chosen": -2.6471452713012695, "logits/rejected": -2.7673840522766113, "logps/chosen": -351.2968444824219, "logps/rejected": -392.42919921875, "loss": 0.2806, "rewards/accuracies": 0.875, "rewards/chosen": -0.2282203882932663, "rewards/margins": 1.928169846534729, "rewards/rejected": -2.1563901901245117, "step": 6538 }, { "epoch": 0.75, "learning_rate": 7.497366264778181e-08, "logits/chosen": -3.1957271099090576, "logits/rejected": -2.5698862075805664, "logps/chosen": -314.2757873535156, "logps/rejected": -216.6915283203125, "loss": 0.1217, "rewards/accuracies": 1.0, "rewards/chosen": 0.2935863733291626, "rewards/margins": 2.604506015777588, "rewards/rejected": -2.3109195232391357, "step": 6539 }, { "epoch": 0.75, "learning_rate": 7.493854617815755e-08, "logits/chosen": -3.0691072940826416, "logits/rejected": -3.3866875171661377, "logps/chosen": -184.68789672851562, "logps/rejected": -204.42652893066406, "loss": 0.1982, "rewards/accuracies": 1.0, "rewards/chosen": 0.29127073287963867, "rewards/margins": 3.587167501449585, "rewards/rejected": -3.295896530151367, "step": 6540 }, { "epoch": 0.75, "learning_rate": 7.49034297085333e-08, "logits/chosen": -3.2182934284210205, "logits/rejected": -3.0686349868774414, "logps/chosen": -361.7158203125, "logps/rejected": -281.7326965332031, "loss": 0.1886, "rewards/accuracies": 1.0, "rewards/chosen": -0.1662176251411438, "rewards/margins": 2.864023208618164, "rewards/rejected": -3.030241012573242, "step": 6541 }, { "epoch": 0.75, "learning_rate": 7.486831323890905e-08, "logits/chosen": -3.263988494873047, "logits/rejected": -3.339576482772827, "logps/chosen": -356.34869384765625, "logps/rejected": -208.26235961914062, "loss": 0.3911, "rewards/accuracies": 0.625, "rewards/chosen": -0.43283194303512573, "rewards/margins": 1.680727243423462, "rewards/rejected": -2.1135592460632324, "step": 6542 }, { "epoch": 0.75, "learning_rate": 7.483319676928479e-08, "logits/chosen": -3.0811853408813477, "logits/rejected": -2.7996299266815186, "logps/chosen": -325.34234619140625, "logps/rejected": -204.7603759765625, "loss": 0.3678, "rewards/accuracies": 1.0, "rewards/chosen": -0.027760900557041168, "rewards/margins": 1.0378828048706055, "rewards/rejected": -1.0656436681747437, "step": 6543 }, { "epoch": 0.75, "learning_rate": 7.479808029966053e-08, "logits/chosen": -3.185659885406494, "logits/rejected": -3.025155544281006, "logps/chosen": -376.037353515625, "logps/rejected": -253.80215454101562, "loss": 0.4801, "rewards/accuracies": 0.875, "rewards/chosen": -0.18034932017326355, "rewards/margins": 0.9969501495361328, "rewards/rejected": -1.1772993803024292, "step": 6544 }, { "epoch": 0.75, "learning_rate": 7.476296383003628e-08, "logits/chosen": -3.1137399673461914, "logits/rejected": -3.4895453453063965, "logps/chosen": -131.1956024169922, "logps/rejected": -309.62750244140625, "loss": 0.2147, "rewards/accuracies": 1.0, "rewards/chosen": -0.1104879379272461, "rewards/margins": 2.5583739280700684, "rewards/rejected": -2.6688621044158936, "step": 6545 }, { "epoch": 0.75, "learning_rate": 7.472784736041202e-08, "logits/chosen": -2.944122076034546, "logits/rejected": -2.8698127269744873, "logps/chosen": -209.2655487060547, "logps/rejected": -254.73280334472656, "loss": 0.6446, "rewards/accuracies": 0.625, "rewards/chosen": 0.4455290138721466, "rewards/margins": 1.7793720960617065, "rewards/rejected": -1.3338429927825928, "step": 6546 }, { "epoch": 0.75, "learning_rate": 7.469273089078778e-08, "logits/chosen": -3.407456874847412, "logits/rejected": -3.568614959716797, "logps/chosen": -121.22969055175781, "logps/rejected": -140.32835388183594, "loss": 0.4275, "rewards/accuracies": 0.75, "rewards/chosen": -0.2390754222869873, "rewards/margins": 1.1134474277496338, "rewards/rejected": -1.3525227308273315, "step": 6547 }, { "epoch": 0.75, "learning_rate": 7.465761442116352e-08, "logits/chosen": -2.6996140480041504, "logits/rejected": -2.9075300693511963, "logps/chosen": -273.2840881347656, "logps/rejected": -247.98765563964844, "loss": 0.5271, "rewards/accuracies": 0.75, "rewards/chosen": -0.15210485458374023, "rewards/margins": 0.7527235746383667, "rewards/rejected": -0.9048283696174622, "step": 6548 }, { "epoch": 0.75, "learning_rate": 7.462249795153927e-08, "logits/chosen": -3.72564697265625, "logits/rejected": -3.3327300548553467, "logps/chosen": -172.9583740234375, "logps/rejected": -177.21322631835938, "loss": 0.4148, "rewards/accuracies": 0.75, "rewards/chosen": -0.21348682045936584, "rewards/margins": 1.2866634130477905, "rewards/rejected": -1.500150203704834, "step": 6549 }, { "epoch": 0.76, "learning_rate": 7.458738148191501e-08, "logits/chosen": -2.6583213806152344, "logits/rejected": -2.983236789703369, "logps/chosen": -224.46682739257812, "logps/rejected": -317.26763916015625, "loss": 0.4317, "rewards/accuracies": 0.875, "rewards/chosen": 0.05847051739692688, "rewards/margins": 0.8639366626739502, "rewards/rejected": -0.8054660558700562, "step": 6550 }, { "epoch": 0.76, "learning_rate": 7.455226501229077e-08, "logits/chosen": -3.823631525039673, "logits/rejected": -3.8988664150238037, "logps/chosen": -157.77517700195312, "logps/rejected": -160.58628845214844, "loss": 0.1956, "rewards/accuracies": 0.875, "rewards/chosen": 0.7096014022827148, "rewards/margins": 2.6385297775268555, "rewards/rejected": -1.9289283752441406, "step": 6551 }, { "epoch": 0.76, "learning_rate": 7.45171485426665e-08, "logits/chosen": -2.4724228382110596, "logits/rejected": -2.536151885986328, "logps/chosen": -271.87908935546875, "logps/rejected": -237.53294372558594, "loss": 0.2861, "rewards/accuracies": 0.875, "rewards/chosen": -0.303237646818161, "rewards/margins": 1.7927935123443604, "rewards/rejected": -2.0960309505462646, "step": 6552 }, { "epoch": 0.76, "learning_rate": 7.448203207304226e-08, "logits/chosen": -2.82582426071167, "logits/rejected": -2.897369861602783, "logps/chosen": -275.49566650390625, "logps/rejected": -283.8038635253906, "loss": 0.3681, "rewards/accuracies": 0.75, "rewards/chosen": 0.33534103631973267, "rewards/margins": 1.918959140777588, "rewards/rejected": -1.5836180448532104, "step": 6553 }, { "epoch": 0.76, "learning_rate": 7.4446915603418e-08, "logits/chosen": -3.2628626823425293, "logits/rejected": -3.2031784057617188, "logps/chosen": -174.4482421875, "logps/rejected": -131.67965698242188, "loss": 0.5132, "rewards/accuracies": 0.75, "rewards/chosen": -0.3746340572834015, "rewards/margins": 1.8896312713623047, "rewards/rejected": -2.264265298843384, "step": 6554 }, { "epoch": 0.76, "learning_rate": 7.441179913379375e-08, "logits/chosen": -3.6295006275177, "logits/rejected": -3.622544765472412, "logps/chosen": -340.1664123535156, "logps/rejected": -265.8690185546875, "loss": 0.4776, "rewards/accuracies": 0.75, "rewards/chosen": -0.7618261575698853, "rewards/margins": 1.3425447940826416, "rewards/rejected": -2.1043710708618164, "step": 6555 }, { "epoch": 0.76, "learning_rate": 7.43766826641695e-08, "logits/chosen": -3.0347893238067627, "logits/rejected": -2.8768563270568848, "logps/chosen": -397.5435791015625, "logps/rejected": -216.635498046875, "loss": 0.2661, "rewards/accuracies": 0.875, "rewards/chosen": -0.09412747621536255, "rewards/margins": 1.7820837497711182, "rewards/rejected": -1.876211166381836, "step": 6556 }, { "epoch": 0.76, "learning_rate": 7.434156619454524e-08, "logits/chosen": -2.8980870246887207, "logits/rejected": -2.7699642181396484, "logps/chosen": -286.5835876464844, "logps/rejected": -198.95071411132812, "loss": 0.4486, "rewards/accuracies": 0.75, "rewards/chosen": -0.8167777061462402, "rewards/margins": 1.9117138385772705, "rewards/rejected": -2.7284913063049316, "step": 6557 }, { "epoch": 0.76, "learning_rate": 7.430644972492099e-08, "logits/chosen": -3.6914734840393066, "logits/rejected": -3.420201301574707, "logps/chosen": -330.1303405761719, "logps/rejected": -233.630615234375, "loss": 0.5795, "rewards/accuracies": 0.75, "rewards/chosen": -0.4414750933647156, "rewards/margins": 1.1753051280975342, "rewards/rejected": -1.6167802810668945, "step": 6558 }, { "epoch": 0.76, "learning_rate": 7.427133325529673e-08, "logits/chosen": -3.2196602821350098, "logits/rejected": -2.7053022384643555, "logps/chosen": -191.9552001953125, "logps/rejected": -210.07772827148438, "loss": 0.3756, "rewards/accuracies": 0.875, "rewards/chosen": -0.5732313990592957, "rewards/margins": 2.16591477394104, "rewards/rejected": -2.7391462326049805, "step": 6559 }, { "epoch": 0.76, "learning_rate": 7.423621678567247e-08, "logits/chosen": -3.453099012374878, "logits/rejected": -3.1872401237487793, "logps/chosen": -195.47744750976562, "logps/rejected": -231.50392150878906, "loss": 0.3645, "rewards/accuracies": 1.0, "rewards/chosen": -0.4133031964302063, "rewards/margins": 1.0815770626068115, "rewards/rejected": -1.494880199432373, "step": 6560 }, { "epoch": 0.76, "learning_rate": 7.420110031604822e-08, "logits/chosen": -2.5221362113952637, "logits/rejected": -2.389615535736084, "logps/chosen": -269.4752502441406, "logps/rejected": -215.35653686523438, "loss": 0.3649, "rewards/accuracies": 0.75, "rewards/chosen": 0.100715771317482, "rewards/margins": 1.5482146739959717, "rewards/rejected": -1.4474987983703613, "step": 6561 }, { "epoch": 0.76, "learning_rate": 7.416598384642397e-08, "logits/chosen": -3.8376359939575195, "logits/rejected": -3.818405866622925, "logps/chosen": -162.30712890625, "logps/rejected": -178.76254272460938, "loss": 0.2959, "rewards/accuracies": 1.0, "rewards/chosen": -0.26024556159973145, "rewards/margins": 1.7581939697265625, "rewards/rejected": -2.018439292907715, "step": 6562 }, { "epoch": 0.76, "learning_rate": 7.413086737679972e-08, "logits/chosen": -3.087174892425537, "logits/rejected": -3.609579563140869, "logps/chosen": -270.89105224609375, "logps/rejected": -223.346435546875, "loss": 0.4289, "rewards/accuracies": 0.75, "rewards/chosen": -0.5051583647727966, "rewards/margins": 3.220599412918091, "rewards/rejected": -3.725757598876953, "step": 6563 }, { "epoch": 0.76, "learning_rate": 7.409575090717546e-08, "logits/chosen": -3.2304420471191406, "logits/rejected": -2.9432637691497803, "logps/chosen": -173.7431640625, "logps/rejected": -219.8651580810547, "loss": 0.2819, "rewards/accuracies": 0.875, "rewards/chosen": 0.08300942182540894, "rewards/margins": 1.6432474851608276, "rewards/rejected": -1.5602381229400635, "step": 6564 }, { "epoch": 0.76, "learning_rate": 7.406063443755121e-08, "logits/chosen": -3.2650394439697266, "logits/rejected": -3.551687240600586, "logps/chosen": -382.53240966796875, "logps/rejected": -364.50823974609375, "loss": 0.2217, "rewards/accuracies": 1.0, "rewards/chosen": 0.06722836196422577, "rewards/margins": 2.3627755641937256, "rewards/rejected": -2.2955472469329834, "step": 6565 }, { "epoch": 0.76, "learning_rate": 7.402551796792695e-08, "logits/chosen": -3.673449993133545, "logits/rejected": -3.650412082672119, "logps/chosen": -116.31795501708984, "logps/rejected": -153.26809692382812, "loss": 0.3034, "rewards/accuracies": 1.0, "rewards/chosen": 0.07303275167942047, "rewards/margins": 1.7260956764221191, "rewards/rejected": -1.6530628204345703, "step": 6566 }, { "epoch": 0.76, "learning_rate": 7.399040149830271e-08, "logits/chosen": -3.037778615951538, "logits/rejected": -3.0889666080474854, "logps/chosen": -315.35723876953125, "logps/rejected": -297.5625915527344, "loss": 0.5119, "rewards/accuracies": 0.75, "rewards/chosen": -0.043115824460983276, "rewards/margins": 1.1498191356658936, "rewards/rejected": -1.1929349899291992, "step": 6567 }, { "epoch": 0.76, "learning_rate": 7.395528502867845e-08, "logits/chosen": -2.7946603298187256, "logits/rejected": -2.781637668609619, "logps/chosen": -229.590087890625, "logps/rejected": -322.6257019042969, "loss": 0.2951, "rewards/accuracies": 1.0, "rewards/chosen": 0.22338059544563293, "rewards/margins": 1.4999415874481201, "rewards/rejected": -1.2765610218048096, "step": 6568 }, { "epoch": 0.76, "learning_rate": 7.39201685590542e-08, "logits/chosen": -3.799649953842163, "logits/rejected": -3.091071605682373, "logps/chosen": -292.37030029296875, "logps/rejected": -172.63653564453125, "loss": 0.109, "rewards/accuracies": 1.0, "rewards/chosen": 0.6050893664360046, "rewards/margins": 2.57327938079834, "rewards/rejected": -1.9681899547576904, "step": 6569 }, { "epoch": 0.76, "learning_rate": 7.388505208942994e-08, "logits/chosen": -2.6681320667266846, "logits/rejected": -2.2829763889312744, "logps/chosen": -232.72991943359375, "logps/rejected": -234.8534698486328, "loss": 0.721, "rewards/accuracies": 0.75, "rewards/chosen": -0.4991178512573242, "rewards/margins": 1.1059459447860718, "rewards/rejected": -1.605063796043396, "step": 6570 }, { "epoch": 0.76, "learning_rate": 7.384993561980568e-08, "logits/chosen": -2.693619966506958, "logits/rejected": -2.6823458671569824, "logps/chosen": -188.834228515625, "logps/rejected": -186.0283203125, "loss": 0.3496, "rewards/accuracies": 0.75, "rewards/chosen": 0.8836168050765991, "rewards/margins": 2.0868711471557617, "rewards/rejected": -1.2032543420791626, "step": 6571 }, { "epoch": 0.76, "learning_rate": 7.381481915018144e-08, "logits/chosen": -2.8669321537017822, "logits/rejected": -3.1156327724456787, "logps/chosen": -394.03985595703125, "logps/rejected": -349.4714050292969, "loss": 0.2654, "rewards/accuracies": 0.875, "rewards/chosen": -0.4411448836326599, "rewards/margins": 1.8779528141021729, "rewards/rejected": -2.3190977573394775, "step": 6572 }, { "epoch": 0.76, "learning_rate": 7.377970268055718e-08, "logits/chosen": -3.653082847595215, "logits/rejected": -3.659794569015503, "logps/chosen": -138.87667846679688, "logps/rejected": -165.00100708007812, "loss": 0.2209, "rewards/accuracies": 1.0, "rewards/chosen": 0.6658039689064026, "rewards/margins": 2.3833413124084473, "rewards/rejected": -1.7175371646881104, "step": 6573 }, { "epoch": 0.76, "learning_rate": 7.374458621093292e-08, "logits/chosen": -3.5320115089416504, "logits/rejected": -3.4295780658721924, "logps/chosen": -311.75421142578125, "logps/rejected": -240.383544921875, "loss": 0.2954, "rewards/accuracies": 0.875, "rewards/chosen": 0.20077693462371826, "rewards/margins": 1.664237380027771, "rewards/rejected": -1.4634605646133423, "step": 6574 }, { "epoch": 0.76, "learning_rate": 7.370946974130867e-08, "logits/chosen": -3.0961413383483887, "logits/rejected": -3.030369520187378, "logps/chosen": -223.82855224609375, "logps/rejected": -285.28643798828125, "loss": 0.3682, "rewards/accuracies": 0.875, "rewards/chosen": 0.018676668405532837, "rewards/margins": 1.618493676185608, "rewards/rejected": -1.599817156791687, "step": 6575 }, { "epoch": 0.76, "learning_rate": 7.367435327168441e-08, "logits/chosen": -2.709744691848755, "logits/rejected": -2.7605671882629395, "logps/chosen": -300.780029296875, "logps/rejected": -239.3838653564453, "loss": 0.5513, "rewards/accuracies": 0.75, "rewards/chosen": -0.22409212589263916, "rewards/margins": 1.4853479862213135, "rewards/rejected": -1.709439992904663, "step": 6576 }, { "epoch": 0.76, "learning_rate": 7.363923680206015e-08, "logits/chosen": -2.934593677520752, "logits/rejected": -2.5984690189361572, "logps/chosen": -341.60540771484375, "logps/rejected": -358.59320068359375, "loss": 0.1804, "rewards/accuracies": 0.875, "rewards/chosen": -0.11304014921188354, "rewards/margins": 2.775171995162964, "rewards/rejected": -2.888212203979492, "step": 6577 }, { "epoch": 0.76, "learning_rate": 7.360412033243591e-08, "logits/chosen": -2.4159858226776123, "logits/rejected": -2.5416922569274902, "logps/chosen": -148.5828399658203, "logps/rejected": -169.5196533203125, "loss": 0.6456, "rewards/accuracies": 0.75, "rewards/chosen": -0.3145846128463745, "rewards/margins": 1.2747557163238525, "rewards/rejected": -1.5893402099609375, "step": 6578 }, { "epoch": 0.76, "learning_rate": 7.356900386281165e-08, "logits/chosen": -2.7396240234375, "logits/rejected": -3.0702242851257324, "logps/chosen": -124.77307891845703, "logps/rejected": -189.300537109375, "loss": 0.324, "rewards/accuracies": 1.0, "rewards/chosen": 0.04652014374732971, "rewards/margins": 2.0168776512145996, "rewards/rejected": -1.9703575372695923, "step": 6579 }, { "epoch": 0.76, "learning_rate": 7.35338873931874e-08, "logits/chosen": -3.2427845001220703, "logits/rejected": -3.3166089057922363, "logps/chosen": -385.1775207519531, "logps/rejected": -345.2815246582031, "loss": 0.4005, "rewards/accuracies": 0.875, "rewards/chosen": -0.4405224621295929, "rewards/margins": 2.231431007385254, "rewards/rejected": -2.6719532012939453, "step": 6580 }, { "epoch": 0.76, "learning_rate": 7.349877092356314e-08, "logits/chosen": -3.1772992610931396, "logits/rejected": -3.5933563709259033, "logps/chosen": -285.979248046875, "logps/rejected": -338.789306640625, "loss": 0.2289, "rewards/accuracies": 0.875, "rewards/chosen": -0.1195153146982193, "rewards/margins": 3.2299652099609375, "rewards/rejected": -3.349480628967285, "step": 6581 }, { "epoch": 0.76, "learning_rate": 7.34636544539389e-08, "logits/chosen": -2.854369878768921, "logits/rejected": -3.33432674407959, "logps/chosen": -255.5343780517578, "logps/rejected": -345.756591796875, "loss": 0.3158, "rewards/accuracies": 0.875, "rewards/chosen": 0.03855688124895096, "rewards/margins": 3.160937547683716, "rewards/rejected": -3.1223807334899902, "step": 6582 }, { "epoch": 0.76, "learning_rate": 7.342853798431464e-08, "logits/chosen": -3.3230390548706055, "logits/rejected": -3.6015889644622803, "logps/chosen": -386.1002197265625, "logps/rejected": -301.2063903808594, "loss": 1.0807, "rewards/accuracies": 0.25, "rewards/chosen": -0.8801460266113281, "rewards/margins": -0.1642184853553772, "rewards/rejected": -0.7159274816513062, "step": 6583 }, { "epoch": 0.76, "learning_rate": 7.339342151469039e-08, "logits/chosen": -3.225224494934082, "logits/rejected": -3.286911725997925, "logps/chosen": -223.21884155273438, "logps/rejected": -247.82131958007812, "loss": 0.3229, "rewards/accuracies": 0.875, "rewards/chosen": 0.11151683330535889, "rewards/margins": 1.7528144121170044, "rewards/rejected": -1.641297459602356, "step": 6584 }, { "epoch": 0.76, "learning_rate": 7.335830504506613e-08, "logits/chosen": -3.4800806045532227, "logits/rejected": -3.612044095993042, "logps/chosen": -344.05462646484375, "logps/rejected": -362.6707763671875, "loss": 0.4374, "rewards/accuracies": 0.625, "rewards/chosen": 0.46219661831855774, "rewards/margins": 1.9599944353103638, "rewards/rejected": -1.497797966003418, "step": 6585 }, { "epoch": 0.76, "learning_rate": 7.332318857544189e-08, "logits/chosen": -2.48722505569458, "logits/rejected": -2.799565553665161, "logps/chosen": -293.5058898925781, "logps/rejected": -328.15283203125, "loss": 0.1652, "rewards/accuracies": 1.0, "rewards/chosen": 0.33406656980514526, "rewards/margins": 2.9243407249450684, "rewards/rejected": -2.5902740955352783, "step": 6586 }, { "epoch": 0.76, "learning_rate": 7.328807210581763e-08, "logits/chosen": -2.4580211639404297, "logits/rejected": -2.5151443481445312, "logps/chosen": -385.6964111328125, "logps/rejected": -387.6826171875, "loss": 0.1973, "rewards/accuracies": 1.0, "rewards/chosen": 0.3196598291397095, "rewards/margins": 2.1024599075317383, "rewards/rejected": -1.7827999591827393, "step": 6587 }, { "epoch": 0.76, "learning_rate": 7.325295563619337e-08, "logits/chosen": -2.8867688179016113, "logits/rejected": -2.915910482406616, "logps/chosen": -304.27899169921875, "logps/rejected": -221.94970703125, "loss": 0.3937, "rewards/accuracies": 1.0, "rewards/chosen": -0.5438318848609924, "rewards/margins": 1.2057363986968994, "rewards/rejected": -1.749568223953247, "step": 6588 }, { "epoch": 0.76, "learning_rate": 7.321783916656912e-08, "logits/chosen": -2.8618574142456055, "logits/rejected": -3.043779134750366, "logps/chosen": -370.2707214355469, "logps/rejected": -199.16653442382812, "loss": 0.2611, "rewards/accuracies": 0.875, "rewards/chosen": 0.6325554847717285, "rewards/margins": 2.4252612590789795, "rewards/rejected": -1.792705774307251, "step": 6589 }, { "epoch": 0.76, "learning_rate": 7.318272269694486e-08, "logits/chosen": -2.5004491806030273, "logits/rejected": -2.4756076335906982, "logps/chosen": -387.2206115722656, "logps/rejected": -300.1881103515625, "loss": 0.219, "rewards/accuracies": 0.875, "rewards/chosen": -0.12096215039491653, "rewards/margins": 2.4372568130493164, "rewards/rejected": -2.5582189559936523, "step": 6590 }, { "epoch": 0.76, "learning_rate": 7.31476062273206e-08, "logits/chosen": -2.73048734664917, "logits/rejected": -2.8974485397338867, "logps/chosen": -216.206787109375, "logps/rejected": -452.98919677734375, "loss": 0.1824, "rewards/accuracies": 1.0, "rewards/chosen": -0.6143167614936829, "rewards/margins": 2.635281562805176, "rewards/rejected": -3.249598264694214, "step": 6591 }, { "epoch": 0.76, "learning_rate": 7.311248975769636e-08, "logits/chosen": -2.9269661903381348, "logits/rejected": -2.8026859760284424, "logps/chosen": -351.3409423828125, "logps/rejected": -222.53414916992188, "loss": 0.4706, "rewards/accuracies": 0.625, "rewards/chosen": -0.4823496341705322, "rewards/margins": 1.4993293285369873, "rewards/rejected": -1.9816789627075195, "step": 6592 }, { "epoch": 0.76, "learning_rate": 7.30773732880721e-08, "logits/chosen": -3.4206268787384033, "logits/rejected": -3.165123701095581, "logps/chosen": -219.85240173339844, "logps/rejected": -161.5966796875, "loss": 0.4695, "rewards/accuracies": 0.75, "rewards/chosen": -0.4366489052772522, "rewards/margins": 0.7438679337501526, "rewards/rejected": -1.1805168390274048, "step": 6593 }, { "epoch": 0.76, "learning_rate": 7.304225681844785e-08, "logits/chosen": -3.3184702396392822, "logits/rejected": -3.3513588905334473, "logps/chosen": -289.2336120605469, "logps/rejected": -285.96282958984375, "loss": 0.2545, "rewards/accuracies": 0.875, "rewards/chosen": -0.08510027080774307, "rewards/margins": 2.770881175994873, "rewards/rejected": -2.8559815883636475, "step": 6594 }, { "epoch": 0.76, "learning_rate": 7.300714034882359e-08, "logits/chosen": -3.0261547565460205, "logits/rejected": -3.293872117996216, "logps/chosen": -365.9212646484375, "logps/rejected": -216.68141174316406, "loss": 0.6271, "rewards/accuracies": 0.625, "rewards/chosen": -0.23850193619728088, "rewards/margins": 0.6997445225715637, "rewards/rejected": -0.9382464289665222, "step": 6595 }, { "epoch": 0.76, "learning_rate": 7.297202387919934e-08, "logits/chosen": -2.764916181564331, "logits/rejected": -2.733583688735962, "logps/chosen": -253.4749755859375, "logps/rejected": -271.82354736328125, "loss": 0.3237, "rewards/accuracies": 0.875, "rewards/chosen": -0.2616100013256073, "rewards/margins": 2.655777931213379, "rewards/rejected": -2.9173879623413086, "step": 6596 }, { "epoch": 0.76, "learning_rate": 7.293690740957509e-08, "logits/chosen": -3.0367796421051025, "logits/rejected": -2.8973419666290283, "logps/chosen": -119.31654357910156, "logps/rejected": -163.88279724121094, "loss": 0.151, "rewards/accuracies": 1.0, "rewards/chosen": 0.6820077896118164, "rewards/margins": 2.5302162170410156, "rewards/rejected": -1.8482081890106201, "step": 6597 }, { "epoch": 0.76, "learning_rate": 7.290179093995084e-08, "logits/chosen": -3.1570887565612793, "logits/rejected": -3.130584716796875, "logps/chosen": -281.2960205078125, "logps/rejected": -351.78338623046875, "loss": 0.6444, "rewards/accuracies": 0.625, "rewards/chosen": -0.26752883195877075, "rewards/margins": 1.0191702842712402, "rewards/rejected": -1.2866990566253662, "step": 6598 }, { "epoch": 0.76, "learning_rate": 7.286667447032658e-08, "logits/chosen": -3.4877877235412598, "logits/rejected": -3.5196831226348877, "logps/chosen": -141.37815856933594, "logps/rejected": -268.1093444824219, "loss": 0.4137, "rewards/accuracies": 0.75, "rewards/chosen": -0.015153162181377411, "rewards/margins": 2.7920899391174316, "rewards/rejected": -2.8072431087493896, "step": 6599 }, { "epoch": 0.76, "learning_rate": 7.283155800070233e-08, "logits/chosen": -3.2647998332977295, "logits/rejected": -3.3458619117736816, "logps/chosen": -207.10198974609375, "logps/rejected": -146.5789794921875, "loss": 0.3883, "rewards/accuracies": 0.75, "rewards/chosen": 0.22705727815628052, "rewards/margins": 1.4499037265777588, "rewards/rejected": -1.222846508026123, "step": 6600 }, { "epoch": 0.76, "learning_rate": 7.279644153107807e-08, "logits/chosen": -3.068354368209839, "logits/rejected": -3.0070900917053223, "logps/chosen": -154.88912963867188, "logps/rejected": -164.224853515625, "loss": 0.2916, "rewards/accuracies": 1.0, "rewards/chosen": -0.4312378764152527, "rewards/margins": 1.673109531402588, "rewards/rejected": -2.1043477058410645, "step": 6601 }, { "epoch": 0.76, "learning_rate": 7.276132506145382e-08, "logits/chosen": -2.479203224182129, "logits/rejected": -2.439589023590088, "logps/chosen": -410.0198974609375, "logps/rejected": -344.8086242675781, "loss": 0.2098, "rewards/accuracies": 1.0, "rewards/chosen": 0.07584428787231445, "rewards/margins": 1.9041697978973389, "rewards/rejected": -1.8283253908157349, "step": 6602 }, { "epoch": 0.76, "learning_rate": 7.272620859182957e-08, "logits/chosen": -3.464813709259033, "logits/rejected": -3.331787347793579, "logps/chosen": -210.48318481445312, "logps/rejected": -194.12054443359375, "loss": 0.4011, "rewards/accuracies": 0.875, "rewards/chosen": 0.040523916482925415, "rewards/margins": 1.0720840692520142, "rewards/rejected": -1.0315600633621216, "step": 6603 }, { "epoch": 0.76, "learning_rate": 7.269109212220531e-08, "logits/chosen": -2.8950209617614746, "logits/rejected": -2.8414676189422607, "logps/chosen": -244.26309204101562, "logps/rejected": -216.5749969482422, "loss": 0.3822, "rewards/accuracies": 0.875, "rewards/chosen": 0.2093818634748459, "rewards/margins": 2.6461052894592285, "rewards/rejected": -2.436723232269287, "step": 6604 }, { "epoch": 0.76, "learning_rate": 7.265597565258105e-08, "logits/chosen": -2.4789164066314697, "logits/rejected": -2.731724262237549, "logps/chosen": -415.61572265625, "logps/rejected": -332.0397033691406, "loss": 0.2105, "rewards/accuracies": 0.875, "rewards/chosen": 0.5051683187484741, "rewards/margins": 2.543698787689209, "rewards/rejected": -2.0385305881500244, "step": 6605 }, { "epoch": 0.76, "learning_rate": 7.26208591829568e-08, "logits/chosen": -3.04355525970459, "logits/rejected": -3.0584754943847656, "logps/chosen": -145.44419860839844, "logps/rejected": -183.7479248046875, "loss": 0.2454, "rewards/accuracies": 1.0, "rewards/chosen": -0.11683131009340286, "rewards/margins": 1.7397232055664062, "rewards/rejected": -1.8565545082092285, "step": 6606 }, { "epoch": 0.76, "learning_rate": 7.258574271333254e-08, "logits/chosen": -3.069213628768921, "logits/rejected": -3.0839767456054688, "logps/chosen": -382.6197509765625, "logps/rejected": -352.81036376953125, "loss": 0.4289, "rewards/accuracies": 0.75, "rewards/chosen": -0.4472340941429138, "rewards/margins": 1.5382022857666016, "rewards/rejected": -1.9854364395141602, "step": 6607 }, { "epoch": 0.76, "learning_rate": 7.25506262437083e-08, "logits/chosen": -2.5663466453552246, "logits/rejected": -2.448749303817749, "logps/chosen": -431.25640869140625, "logps/rejected": -407.8976135253906, "loss": 0.41, "rewards/accuracies": 0.75, "rewards/chosen": -0.08725781738758087, "rewards/margins": 1.3570433855056763, "rewards/rejected": -1.4443011283874512, "step": 6608 }, { "epoch": 0.76, "learning_rate": 7.251550977408404e-08, "logits/chosen": -3.1010258197784424, "logits/rejected": -3.592433452606201, "logps/chosen": -124.57841491699219, "logps/rejected": -231.12217712402344, "loss": 0.6285, "rewards/accuracies": 0.625, "rewards/chosen": -0.5493834018707275, "rewards/margins": 1.2481483221054077, "rewards/rejected": -1.7975317239761353, "step": 6609 }, { "epoch": 0.76, "learning_rate": 7.248039330445979e-08, "logits/chosen": -2.9423599243164062, "logits/rejected": -3.1406102180480957, "logps/chosen": -195.53724670410156, "logps/rejected": -139.0950927734375, "loss": 0.2655, "rewards/accuracies": 1.0, "rewards/chosen": -0.09939060360193253, "rewards/margins": 1.8934528827667236, "rewards/rejected": -1.9928436279296875, "step": 6610 }, { "epoch": 0.76, "learning_rate": 7.244527683483553e-08, "logits/chosen": -3.5579724311828613, "logits/rejected": -3.3107471466064453, "logps/chosen": -177.3703155517578, "logps/rejected": -177.27903747558594, "loss": 0.1512, "rewards/accuracies": 1.0, "rewards/chosen": 0.08712656050920486, "rewards/margins": 2.6514687538146973, "rewards/rejected": -2.5643422603607178, "step": 6611 }, { "epoch": 0.76, "learning_rate": 7.241016036521129e-08, "logits/chosen": -3.035994529724121, "logits/rejected": -3.455512285232544, "logps/chosen": -324.8985290527344, "logps/rejected": -379.5412902832031, "loss": 0.3732, "rewards/accuracies": 0.75, "rewards/chosen": 0.20846936106681824, "rewards/margins": 2.7955129146575928, "rewards/rejected": -2.587043523788452, "step": 6612 }, { "epoch": 0.76, "learning_rate": 7.237504389558703e-08, "logits/chosen": -3.4439263343811035, "logits/rejected": -3.084343910217285, "logps/chosen": -136.7746124267578, "logps/rejected": -274.33209228515625, "loss": 0.7127, "rewards/accuracies": 0.625, "rewards/chosen": -0.46779265999794006, "rewards/margins": 1.4575713872909546, "rewards/rejected": -1.9253641366958618, "step": 6613 }, { "epoch": 0.76, "learning_rate": 7.233992742596278e-08, "logits/chosen": -3.0896201133728027, "logits/rejected": -2.9064269065856934, "logps/chosen": -542.2271728515625, "logps/rejected": -318.9859619140625, "loss": 0.4517, "rewards/accuracies": 0.625, "rewards/chosen": 0.030910730361938477, "rewards/margins": 1.4932682514190674, "rewards/rejected": -1.4623576402664185, "step": 6614 }, { "epoch": 0.76, "learning_rate": 7.230481095633852e-08, "logits/chosen": -3.2109017372131348, "logits/rejected": -3.528550386428833, "logps/chosen": -137.2234649658203, "logps/rejected": -335.4786682128906, "loss": 0.3683, "rewards/accuracies": 0.75, "rewards/chosen": 0.08276181668043137, "rewards/margins": 2.5111641883850098, "rewards/rejected": -2.4284024238586426, "step": 6615 }, { "epoch": 0.76, "learning_rate": 7.226969448671426e-08, "logits/chosen": -2.943584442138672, "logits/rejected": -3.2587332725524902, "logps/chosen": -268.35321044921875, "logps/rejected": -353.7181091308594, "loss": 0.2004, "rewards/accuracies": 0.875, "rewards/chosen": 0.12791457772254944, "rewards/margins": 3.424621820449829, "rewards/rejected": -3.2967071533203125, "step": 6616 }, { "epoch": 0.76, "learning_rate": 7.223457801709002e-08, "logits/chosen": -2.634294271469116, "logits/rejected": -2.826751232147217, "logps/chosen": -314.0805969238281, "logps/rejected": -284.1463928222656, "loss": 0.5663, "rewards/accuracies": 0.75, "rewards/chosen": -0.7083326578140259, "rewards/margins": 1.1154899597167969, "rewards/rejected": -1.8238226175308228, "step": 6617 }, { "epoch": 0.76, "learning_rate": 7.219946154746576e-08, "logits/chosen": -2.7137644290924072, "logits/rejected": -2.4702041149139404, "logps/chosen": -162.53102111816406, "logps/rejected": -209.2808380126953, "loss": 0.3577, "rewards/accuracies": 0.75, "rewards/chosen": 0.0676794946193695, "rewards/margins": 2.1183981895446777, "rewards/rejected": -2.0507185459136963, "step": 6618 }, { "epoch": 0.76, "learning_rate": 7.21643450778415e-08, "logits/chosen": -3.59616756439209, "logits/rejected": -3.283940315246582, "logps/chosen": -138.39112854003906, "logps/rejected": -252.56558227539062, "loss": 0.3458, "rewards/accuracies": 0.875, "rewards/chosen": -0.0873841643333435, "rewards/margins": 2.8912107944488525, "rewards/rejected": -2.978595018386841, "step": 6619 }, { "epoch": 0.76, "learning_rate": 7.212922860821725e-08, "logits/chosen": -3.281773567199707, "logits/rejected": -3.1433534622192383, "logps/chosen": -450.8482666015625, "logps/rejected": -387.91387939453125, "loss": 0.536, "rewards/accuracies": 0.75, "rewards/chosen": -0.31919652223587036, "rewards/margins": 0.6780502796173096, "rewards/rejected": -0.9972468018531799, "step": 6620 }, { "epoch": 0.76, "learning_rate": 7.209411213859299e-08, "logits/chosen": -3.5525543689727783, "logits/rejected": -3.659085512161255, "logps/chosen": -212.59400939941406, "logps/rejected": -214.4329376220703, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": 0.6282408237457275, "rewards/margins": 3.8010103702545166, "rewards/rejected": -3.172769546508789, "step": 6621 }, { "epoch": 0.76, "learning_rate": 7.205899566896873e-08, "logits/chosen": -3.501840591430664, "logits/rejected": -3.640934944152832, "logps/chosen": -366.5970458984375, "logps/rejected": -329.14691162109375, "loss": 0.1273, "rewards/accuracies": 0.875, "rewards/chosen": 0.44421055912971497, "rewards/margins": 4.386341094970703, "rewards/rejected": -3.9421300888061523, "step": 6622 }, { "epoch": 0.76, "learning_rate": 7.202387919934449e-08, "logits/chosen": -3.627495288848877, "logits/rejected": -3.5513572692871094, "logps/chosen": -250.59255981445312, "logps/rejected": -246.3236846923828, "loss": 0.409, "rewards/accuracies": 0.625, "rewards/chosen": -0.3748490512371063, "rewards/margins": 1.4611618518829346, "rewards/rejected": -1.8360109329223633, "step": 6623 }, { "epoch": 0.76, "learning_rate": 7.198876272972023e-08, "logits/chosen": -2.953254222869873, "logits/rejected": -3.1718297004699707, "logps/chosen": -260.9346923828125, "logps/rejected": -284.1019287109375, "loss": 0.1047, "rewards/accuracies": 1.0, "rewards/chosen": 0.13985252380371094, "rewards/margins": 3.4038891792297363, "rewards/rejected": -3.2640368938446045, "step": 6624 }, { "epoch": 0.76, "learning_rate": 7.195364626009598e-08, "logits/chosen": -2.8256802558898926, "logits/rejected": -2.689800262451172, "logps/chosen": -274.3408508300781, "logps/rejected": -280.6924743652344, "loss": 0.3296, "rewards/accuracies": 0.875, "rewards/chosen": 0.524655818939209, "rewards/margins": 2.620244026184082, "rewards/rejected": -2.095588207244873, "step": 6625 }, { "epoch": 0.76, "learning_rate": 7.191852979047172e-08, "logits/chosen": -3.253890037536621, "logits/rejected": -3.3274035453796387, "logps/chosen": -349.3419494628906, "logps/rejected": -321.2421875, "loss": 0.2225, "rewards/accuracies": 1.0, "rewards/chosen": 0.10230762511491776, "rewards/margins": 2.3544728755950928, "rewards/rejected": -2.2521653175354004, "step": 6626 }, { "epoch": 0.76, "learning_rate": 7.188341332084748e-08, "logits/chosen": -2.6496565341949463, "logits/rejected": -3.2538821697235107, "logps/chosen": -293.84222412109375, "logps/rejected": -279.19268798828125, "loss": 0.6286, "rewards/accuracies": 0.75, "rewards/chosen": 0.27105391025543213, "rewards/margins": 1.2021445035934448, "rewards/rejected": -0.9310905337333679, "step": 6627 }, { "epoch": 0.76, "learning_rate": 7.184829685122322e-08, "logits/chosen": -3.8767013549804688, "logits/rejected": -3.9895691871643066, "logps/chosen": -236.79617309570312, "logps/rejected": -275.4668884277344, "loss": 0.2359, "rewards/accuracies": 1.0, "rewards/chosen": -0.17204219102859497, "rewards/margins": 2.416640281677246, "rewards/rejected": -2.5886824131011963, "step": 6628 }, { "epoch": 0.76, "learning_rate": 7.181318038159897e-08, "logits/chosen": -3.6003079414367676, "logits/rejected": -3.406780481338501, "logps/chosen": -385.0673522949219, "logps/rejected": -255.81765747070312, "loss": 0.4563, "rewards/accuracies": 0.75, "rewards/chosen": 0.6024448275566101, "rewards/margins": 1.4827666282653809, "rewards/rejected": -0.8803219795227051, "step": 6629 }, { "epoch": 0.76, "learning_rate": 7.177806391197471e-08, "logits/chosen": -2.565763235092163, "logits/rejected": -3.053691864013672, "logps/chosen": -269.202880859375, "logps/rejected": -309.23834228515625, "loss": 0.2889, "rewards/accuracies": 0.75, "rewards/chosen": 0.2925536632537842, "rewards/margins": 2.461045026779175, "rewards/rejected": -2.1684916019439697, "step": 6630 }, { "epoch": 0.76, "learning_rate": 7.174294744235047e-08, "logits/chosen": -3.250678539276123, "logits/rejected": -2.79311466217041, "logps/chosen": -204.64784240722656, "logps/rejected": -173.93641662597656, "loss": 1.0101, "rewards/accuracies": 0.625, "rewards/chosen": -0.5714042782783508, "rewards/margins": -0.042747579514980316, "rewards/rejected": -0.5286567211151123, "step": 6631 }, { "epoch": 0.76, "learning_rate": 7.17078309727262e-08, "logits/chosen": -2.7324862480163574, "logits/rejected": -2.448641061782837, "logps/chosen": -399.866455078125, "logps/rejected": -241.06732177734375, "loss": 0.2154, "rewards/accuracies": 0.875, "rewards/chosen": 0.2321067750453949, "rewards/margins": 2.325801134109497, "rewards/rejected": -2.0936942100524902, "step": 6632 }, { "epoch": 0.76, "learning_rate": 7.167271450310195e-08, "logits/chosen": -2.811323642730713, "logits/rejected": -2.7569775581359863, "logps/chosen": -103.21920776367188, "logps/rejected": -268.3199462890625, "loss": 0.2326, "rewards/accuracies": 0.875, "rewards/chosen": -0.3497190475463867, "rewards/margins": 2.877138614654541, "rewards/rejected": -3.226858139038086, "step": 6633 }, { "epoch": 0.76, "learning_rate": 7.16375980334777e-08, "logits/chosen": -3.074343204498291, "logits/rejected": -3.1325931549072266, "logps/chosen": -159.3836669921875, "logps/rejected": -293.4352111816406, "loss": 0.1144, "rewards/accuracies": 1.0, "rewards/chosen": 0.1650058478116989, "rewards/margins": 2.8727447986602783, "rewards/rejected": -2.7077388763427734, "step": 6634 }, { "epoch": 0.76, "learning_rate": 7.160248156385344e-08, "logits/chosen": -2.786348342895508, "logits/rejected": -2.751363754272461, "logps/chosen": -226.59130859375, "logps/rejected": -234.95745849609375, "loss": 0.6938, "rewards/accuracies": 0.5, "rewards/chosen": -0.8025518655776978, "rewards/margins": 0.8964369893074036, "rewards/rejected": -1.6989887952804565, "step": 6635 }, { "epoch": 0.77, "learning_rate": 7.156736509422918e-08, "logits/chosen": -3.1613729000091553, "logits/rejected": -3.12076997756958, "logps/chosen": -352.45343017578125, "logps/rejected": -250.51727294921875, "loss": 0.2797, "rewards/accuracies": 1.0, "rewards/chosen": 0.09752234816551208, "rewards/margins": 1.8817801475524902, "rewards/rejected": -1.7842576503753662, "step": 6636 }, { "epoch": 0.77, "learning_rate": 7.153224862460494e-08, "logits/chosen": -3.170042037963867, "logits/rejected": -2.876321792602539, "logps/chosen": -397.5350341796875, "logps/rejected": -249.6288299560547, "loss": 0.0954, "rewards/accuracies": 1.0, "rewards/chosen": -0.01793639361858368, "rewards/margins": 2.633105754852295, "rewards/rejected": -2.6510422229766846, "step": 6637 }, { "epoch": 0.77, "learning_rate": 7.149713215498068e-08, "logits/chosen": -3.916865348815918, "logits/rejected": -3.617325782775879, "logps/chosen": -282.83282470703125, "logps/rejected": -254.2771453857422, "loss": 0.6052, "rewards/accuracies": 0.625, "rewards/chosen": -0.21804767847061157, "rewards/margins": 0.9236118793487549, "rewards/rejected": -1.1416594982147217, "step": 6638 }, { "epoch": 0.77, "learning_rate": 7.146201568535643e-08, "logits/chosen": -3.4050631523132324, "logits/rejected": -3.0166072845458984, "logps/chosen": -364.1839294433594, "logps/rejected": -260.59027099609375, "loss": 0.1856, "rewards/accuracies": 1.0, "rewards/chosen": 0.423850417137146, "rewards/margins": 2.8847544193267822, "rewards/rejected": -2.460904121398926, "step": 6639 }, { "epoch": 0.77, "learning_rate": 7.142689921573217e-08, "logits/chosen": -3.8831093311309814, "logits/rejected": -3.748851776123047, "logps/chosen": -411.30816650390625, "logps/rejected": -311.664306640625, "loss": 0.5335, "rewards/accuracies": 0.75, "rewards/chosen": 0.018756315112113953, "rewards/margins": 1.118965744972229, "rewards/rejected": -1.1002094745635986, "step": 6640 }, { "epoch": 0.77, "learning_rate": 7.139178274610792e-08, "logits/chosen": -2.618786573410034, "logits/rejected": -2.669257879257202, "logps/chosen": -441.3109130859375, "logps/rejected": -335.4688720703125, "loss": 0.5434, "rewards/accuracies": 0.75, "rewards/chosen": 0.048127755522727966, "rewards/margins": 1.0577514171600342, "rewards/rejected": -1.009623646736145, "step": 6641 }, { "epoch": 0.77, "learning_rate": 7.135666627648366e-08, "logits/chosen": -2.6366286277770996, "logits/rejected": -2.548802375793457, "logps/chosen": -218.60044860839844, "logps/rejected": -215.73910522460938, "loss": 0.3389, "rewards/accuracies": 0.75, "rewards/chosen": -0.039543554186820984, "rewards/margins": 2.1690666675567627, "rewards/rejected": -2.2086100578308105, "step": 6642 }, { "epoch": 0.77, "learning_rate": 7.132154980685942e-08, "logits/chosen": -3.4602255821228027, "logits/rejected": -3.6024680137634277, "logps/chosen": -175.3814697265625, "logps/rejected": -175.419921875, "loss": 0.4408, "rewards/accuracies": 0.875, "rewards/chosen": 0.3367140293121338, "rewards/margins": 1.3090533018112183, "rewards/rejected": -0.9723392128944397, "step": 6643 }, { "epoch": 0.77, "learning_rate": 7.128643333723516e-08, "logits/chosen": -2.9610910415649414, "logits/rejected": -2.975351572036743, "logps/chosen": -169.8816680908203, "logps/rejected": -238.64947509765625, "loss": 0.389, "rewards/accuracies": 0.875, "rewards/chosen": -0.5003246068954468, "rewards/margins": 2.149965763092041, "rewards/rejected": -2.6502902507781982, "step": 6644 }, { "epoch": 0.77, "learning_rate": 7.125131686761091e-08, "logits/chosen": -3.1338865756988525, "logits/rejected": -3.1535000801086426, "logps/chosen": -250.19027709960938, "logps/rejected": -214.16444396972656, "loss": 0.4156, "rewards/accuracies": 0.75, "rewards/chosen": 0.26782864332199097, "rewards/margins": 1.165224552154541, "rewards/rejected": -0.89739590883255, "step": 6645 }, { "epoch": 0.77, "learning_rate": 7.121620039798665e-08, "logits/chosen": -3.5538368225097656, "logits/rejected": -3.584465503692627, "logps/chosen": -376.7685241699219, "logps/rejected": -244.937255859375, "loss": 0.3278, "rewards/accuracies": 0.875, "rewards/chosen": -0.2897607386112213, "rewards/margins": 1.399794578552246, "rewards/rejected": -1.6895554065704346, "step": 6646 }, { "epoch": 0.77, "learning_rate": 7.11810839283624e-08, "logits/chosen": -3.461435317993164, "logits/rejected": -3.579923629760742, "logps/chosen": -238.83140563964844, "logps/rejected": -269.3743591308594, "loss": 0.3737, "rewards/accuracies": 0.75, "rewards/chosen": 0.1338203251361847, "rewards/margins": 2.5245859622955322, "rewards/rejected": -2.39076566696167, "step": 6647 }, { "epoch": 0.77, "learning_rate": 7.114596745873815e-08, "logits/chosen": -2.7149853706359863, "logits/rejected": -2.9587318897247314, "logps/chosen": -331.7323913574219, "logps/rejected": -207.65380859375, "loss": 0.3228, "rewards/accuracies": 0.875, "rewards/chosen": 0.7141863107681274, "rewards/margins": 1.951920509338379, "rewards/rejected": -1.237734079360962, "step": 6648 }, { "epoch": 0.77, "learning_rate": 7.111085098911389e-08, "logits/chosen": -2.763516664505005, "logits/rejected": -2.8042593002319336, "logps/chosen": -331.2662353515625, "logps/rejected": -303.4269104003906, "loss": 0.1619, "rewards/accuracies": 1.0, "rewards/chosen": 0.20973020792007446, "rewards/margins": 2.8871233463287354, "rewards/rejected": -2.6773931980133057, "step": 6649 }, { "epoch": 0.77, "learning_rate": 7.107573451948963e-08, "logits/chosen": -2.8691704273223877, "logits/rejected": -2.6528162956237793, "logps/chosen": -342.21435546875, "logps/rejected": -269.13677978515625, "loss": 0.2727, "rewards/accuracies": 0.875, "rewards/chosen": 0.40638381242752075, "rewards/margins": 2.143589735031128, "rewards/rejected": -1.7372061014175415, "step": 6650 }, { "epoch": 0.77, "learning_rate": 7.104061804986538e-08, "logits/chosen": -3.4815313816070557, "logits/rejected": -3.369663715362549, "logps/chosen": -344.0582275390625, "logps/rejected": -278.8072204589844, "loss": 0.2923, "rewards/accuracies": 0.875, "rewards/chosen": 0.5736908912658691, "rewards/margins": 1.7734524011611938, "rewards/rejected": -1.1997616291046143, "step": 6651 }, { "epoch": 0.77, "learning_rate": 7.100550158024112e-08, "logits/chosen": -3.1271679401397705, "logits/rejected": -3.412827491760254, "logps/chosen": -192.120361328125, "logps/rejected": -193.65478515625, "loss": 0.2803, "rewards/accuracies": 0.875, "rewards/chosen": 0.1226220354437828, "rewards/margins": 2.0264298915863037, "rewards/rejected": -1.9038078784942627, "step": 6652 }, { "epoch": 0.77, "learning_rate": 7.097038511061688e-08, "logits/chosen": -3.3209421634674072, "logits/rejected": -3.6667425632476807, "logps/chosen": -152.48516845703125, "logps/rejected": -171.1188507080078, "loss": 0.3763, "rewards/accuracies": 0.875, "rewards/chosen": -0.1553243100643158, "rewards/margins": 2.03448748588562, "rewards/rejected": -2.1898114681243896, "step": 6653 }, { "epoch": 0.77, "learning_rate": 7.093526864099262e-08, "logits/chosen": -2.957042694091797, "logits/rejected": -2.5807225704193115, "logps/chosen": -326.23101806640625, "logps/rejected": -416.5695495605469, "loss": 0.4976, "rewards/accuracies": 0.75, "rewards/chosen": -0.26947540044784546, "rewards/margins": 1.2970519065856934, "rewards/rejected": -1.5665273666381836, "step": 6654 }, { "epoch": 0.77, "learning_rate": 7.090015217136837e-08, "logits/chosen": -3.4777767658233643, "logits/rejected": -3.6004629135131836, "logps/chosen": -357.6923522949219, "logps/rejected": -329.5279541015625, "loss": 0.3539, "rewards/accuracies": 0.875, "rewards/chosen": -0.30500689148902893, "rewards/margins": 1.3186278343200684, "rewards/rejected": -1.623634696006775, "step": 6655 }, { "epoch": 0.77, "learning_rate": 7.086503570174411e-08, "logits/chosen": -3.143831491470337, "logits/rejected": -3.323920726776123, "logps/chosen": -205.88693237304688, "logps/rejected": -174.4905242919922, "loss": 0.363, "rewards/accuracies": 0.875, "rewards/chosen": -0.05166993290185928, "rewards/margins": 1.30592942237854, "rewards/rejected": -1.357599139213562, "step": 6656 }, { "epoch": 0.77, "learning_rate": 7.082991923211987e-08, "logits/chosen": -3.742042064666748, "logits/rejected": -3.53684663772583, "logps/chosen": -161.99057006835938, "logps/rejected": -224.2655029296875, "loss": 0.224, "rewards/accuracies": 1.0, "rewards/chosen": 0.0058508701622486115, "rewards/margins": 2.103346586227417, "rewards/rejected": -2.0974957942962646, "step": 6657 }, { "epoch": 0.77, "learning_rate": 7.079480276249561e-08, "logits/chosen": -2.90797758102417, "logits/rejected": -3.008984088897705, "logps/chosen": -285.32415771484375, "logps/rejected": -241.7268524169922, "loss": 0.2385, "rewards/accuracies": 1.0, "rewards/chosen": 0.15643614530563354, "rewards/margins": 2.2269272804260254, "rewards/rejected": -2.070491313934326, "step": 6658 }, { "epoch": 0.77, "learning_rate": 7.075968629287136e-08, "logits/chosen": -3.953744411468506, "logits/rejected": -3.896888017654419, "logps/chosen": -107.08198547363281, "logps/rejected": -168.3124237060547, "loss": 0.2167, "rewards/accuracies": 1.0, "rewards/chosen": -0.09443998336791992, "rewards/margins": 2.375124216079712, "rewards/rejected": -2.469564437866211, "step": 6659 }, { "epoch": 0.77, "learning_rate": 7.07245698232471e-08, "logits/chosen": -3.9205381870269775, "logits/rejected": -3.880673885345459, "logps/chosen": -171.4766845703125, "logps/rejected": -141.7198944091797, "loss": 0.3566, "rewards/accuracies": 0.75, "rewards/chosen": -0.05676780641078949, "rewards/margins": 1.5634183883666992, "rewards/rejected": -1.6201860904693604, "step": 6660 }, { "epoch": 0.77, "learning_rate": 7.068945335362286e-08, "logits/chosen": -2.9770309925079346, "logits/rejected": -2.7663726806640625, "logps/chosen": -593.0457763671875, "logps/rejected": -283.5436706542969, "loss": 0.4398, "rewards/accuracies": 0.75, "rewards/chosen": 0.059510380029678345, "rewards/margins": 1.713625192642212, "rewards/rejected": -1.654114842414856, "step": 6661 }, { "epoch": 0.77, "learning_rate": 7.06543368839986e-08, "logits/chosen": -3.2980799674987793, "logits/rejected": -3.4159045219421387, "logps/chosen": -126.28732299804688, "logps/rejected": -130.4154052734375, "loss": 0.5646, "rewards/accuracies": 0.75, "rewards/chosen": -0.6020300388336182, "rewards/margins": 0.33821365237236023, "rewards/rejected": -0.9402437210083008, "step": 6662 }, { "epoch": 0.77, "learning_rate": 7.061922041437434e-08, "logits/chosen": -2.407522439956665, "logits/rejected": -2.7059106826782227, "logps/chosen": -401.61114501953125, "logps/rejected": -251.32638549804688, "loss": 0.2221, "rewards/accuracies": 0.875, "rewards/chosen": 0.8054996132850647, "rewards/margins": 2.454982042312622, "rewards/rejected": -1.6494823694229126, "step": 6663 }, { "epoch": 0.77, "learning_rate": 7.058410394475008e-08, "logits/chosen": -3.126997470855713, "logits/rejected": -3.265401840209961, "logps/chosen": -129.668212890625, "logps/rejected": -239.59869384765625, "loss": 0.2923, "rewards/accuracies": 0.875, "rewards/chosen": -0.1536291241645813, "rewards/margins": 2.4989748001098633, "rewards/rejected": -2.652604103088379, "step": 6664 }, { "epoch": 0.77, "learning_rate": 7.054898747512583e-08, "logits/chosen": -3.0334644317626953, "logits/rejected": -2.9264278411865234, "logps/chosen": -547.9782104492188, "logps/rejected": -377.5182189941406, "loss": 0.7001, "rewards/accuracies": 0.75, "rewards/chosen": -0.5975138545036316, "rewards/margins": 0.7485095262527466, "rewards/rejected": -1.346023440361023, "step": 6665 }, { "epoch": 0.77, "learning_rate": 7.051387100550157e-08, "logits/chosen": -3.206604480743408, "logits/rejected": -3.213071346282959, "logps/chosen": -310.5701904296875, "logps/rejected": -199.7858428955078, "loss": 0.7248, "rewards/accuracies": 0.5, "rewards/chosen": -0.30779728293418884, "rewards/margins": 0.45612674951553345, "rewards/rejected": -0.7639240026473999, "step": 6666 }, { "epoch": 0.77, "learning_rate": 7.047875453587733e-08, "logits/chosen": -3.0999605655670166, "logits/rejected": -3.1131157875061035, "logps/chosen": -187.26983642578125, "logps/rejected": -247.4287567138672, "loss": 0.1693, "rewards/accuracies": 1.0, "rewards/chosen": 0.278534471988678, "rewards/margins": 2.5923070907592773, "rewards/rejected": -2.313772678375244, "step": 6667 }, { "epoch": 0.77, "learning_rate": 7.044363806625307e-08, "logits/chosen": -2.938352584838867, "logits/rejected": -3.012949228286743, "logps/chosen": -295.8277893066406, "logps/rejected": -250.16600036621094, "loss": 0.2989, "rewards/accuracies": 0.875, "rewards/chosen": -0.29879462718963623, "rewards/margins": 1.6152398586273193, "rewards/rejected": -1.914034366607666, "step": 6668 }, { "epoch": 0.77, "learning_rate": 7.040852159662882e-08, "logits/chosen": -2.5871517658233643, "logits/rejected": -2.361168146133423, "logps/chosen": -194.32699584960938, "logps/rejected": -208.1702880859375, "loss": 0.4451, "rewards/accuracies": 0.625, "rewards/chosen": -0.3959242105484009, "rewards/margins": 1.2236363887786865, "rewards/rejected": -1.6195604801177979, "step": 6669 }, { "epoch": 0.77, "learning_rate": 7.037340512700456e-08, "logits/chosen": -2.299639940261841, "logits/rejected": -2.3219780921936035, "logps/chosen": -207.6853485107422, "logps/rejected": -233.10833740234375, "loss": 0.4056, "rewards/accuracies": 0.875, "rewards/chosen": -0.5891304612159729, "rewards/margins": 1.2911396026611328, "rewards/rejected": -1.8802701234817505, "step": 6670 }, { "epoch": 0.77, "learning_rate": 7.03382886573803e-08, "logits/chosen": -3.1508078575134277, "logits/rejected": -3.2363715171813965, "logps/chosen": -194.78114318847656, "logps/rejected": -209.69784545898438, "loss": 0.4915, "rewards/accuracies": 0.875, "rewards/chosen": -0.5257551074028015, "rewards/margins": 1.1251301765441895, "rewards/rejected": -1.6508853435516357, "step": 6671 }, { "epoch": 0.77, "learning_rate": 7.030317218775606e-08, "logits/chosen": -3.638791561126709, "logits/rejected": -3.496417284011841, "logps/chosen": -287.460205078125, "logps/rejected": -246.27737426757812, "loss": 0.4654, "rewards/accuracies": 0.75, "rewards/chosen": -0.6346075534820557, "rewards/margins": 1.455268383026123, "rewards/rejected": -2.0898756980895996, "step": 6672 }, { "epoch": 0.77, "learning_rate": 7.02680557181318e-08, "logits/chosen": -2.658177375793457, "logits/rejected": -2.5061192512512207, "logps/chosen": -121.78240966796875, "logps/rejected": -216.1166229248047, "loss": 0.3421, "rewards/accuracies": 1.0, "rewards/chosen": -0.22282692790031433, "rewards/margins": 1.6694157123565674, "rewards/rejected": -1.8922427892684937, "step": 6673 }, { "epoch": 0.77, "learning_rate": 7.023293924850755e-08, "logits/chosen": -3.0012474060058594, "logits/rejected": -3.2033538818359375, "logps/chosen": -195.69949340820312, "logps/rejected": -164.20249938964844, "loss": 1.0208, "rewards/accuracies": 0.375, "rewards/chosen": -1.0998694896697998, "rewards/margins": -0.4673287570476532, "rewards/rejected": -0.632540762424469, "step": 6674 }, { "epoch": 0.77, "learning_rate": 7.019782277888329e-08, "logits/chosen": -3.6494016647338867, "logits/rejected": -3.647291660308838, "logps/chosen": -305.6177978515625, "logps/rejected": -291.98211669921875, "loss": 0.3706, "rewards/accuracies": 0.75, "rewards/chosen": -0.005999669432640076, "rewards/margins": 1.6800059080123901, "rewards/rejected": -1.6860055923461914, "step": 6675 }, { "epoch": 0.77, "learning_rate": 7.016270630925904e-08, "logits/chosen": -2.1459193229675293, "logits/rejected": -2.3247013092041016, "logps/chosen": -304.48175048828125, "logps/rejected": -278.87744140625, "loss": 0.3691, "rewards/accuracies": 0.875, "rewards/chosen": -0.19487211108207703, "rewards/margins": 2.0786263942718506, "rewards/rejected": -2.27349853515625, "step": 6676 }, { "epoch": 0.77, "learning_rate": 7.012758983963479e-08, "logits/chosen": -2.4983439445495605, "logits/rejected": -2.3572745323181152, "logps/chosen": -368.85272216796875, "logps/rejected": -395.04937744140625, "loss": 0.5107, "rewards/accuracies": 0.75, "rewards/chosen": -0.08674860000610352, "rewards/margins": 0.9968321323394775, "rewards/rejected": -1.083580732345581, "step": 6677 }, { "epoch": 0.77, "learning_rate": 7.009247337001054e-08, "logits/chosen": -3.2500767707824707, "logits/rejected": -3.1882596015930176, "logps/chosen": -307.1824035644531, "logps/rejected": -298.03289794921875, "loss": 0.4929, "rewards/accuracies": 0.625, "rewards/chosen": -0.41428425908088684, "rewards/margins": 1.2904757261276245, "rewards/rejected": -1.7047600746154785, "step": 6678 }, { "epoch": 0.77, "learning_rate": 7.005735690038628e-08, "logits/chosen": -3.0657925605773926, "logits/rejected": -2.844149112701416, "logps/chosen": -267.1283874511719, "logps/rejected": -245.80654907226562, "loss": 0.2491, "rewards/accuracies": 1.0, "rewards/chosen": 0.053504228591918945, "rewards/margins": 2.0084354877471924, "rewards/rejected": -1.9549312591552734, "step": 6679 }, { "epoch": 0.77, "learning_rate": 7.002224043076202e-08, "logits/chosen": -3.683476448059082, "logits/rejected": -3.304522752761841, "logps/chosen": -232.00079345703125, "logps/rejected": -151.9200897216797, "loss": 0.6956, "rewards/accuracies": 0.625, "rewards/chosen": -0.5242166519165039, "rewards/margins": 0.9087802171707153, "rewards/rejected": -1.4329968690872192, "step": 6680 }, { "epoch": 0.77, "learning_rate": 6.998712396113776e-08, "logits/chosen": -2.591982364654541, "logits/rejected": -2.9178860187530518, "logps/chosen": -253.76431274414062, "logps/rejected": -218.83712768554688, "loss": 0.545, "rewards/accuracies": 0.75, "rewards/chosen": -0.46065133810043335, "rewards/margins": 2.1175832748413086, "rewards/rejected": -2.5782344341278076, "step": 6681 }, { "epoch": 0.77, "learning_rate": 6.995200749151351e-08, "logits/chosen": -3.4662060737609863, "logits/rejected": -3.510436534881592, "logps/chosen": -182.4345703125, "logps/rejected": -218.57492065429688, "loss": 0.2201, "rewards/accuracies": 1.0, "rewards/chosen": -0.14503085613250732, "rewards/margins": 1.7174546718597412, "rewards/rejected": -1.862485647201538, "step": 6682 }, { "epoch": 0.77, "learning_rate": 6.991689102188926e-08, "logits/chosen": -3.5678062438964844, "logits/rejected": -3.510648250579834, "logps/chosen": -273.54888916015625, "logps/rejected": -229.04791259765625, "loss": 0.2161, "rewards/accuracies": 0.875, "rewards/chosen": -0.05583646893501282, "rewards/margins": 1.977405071258545, "rewards/rejected": -2.0332415103912354, "step": 6683 }, { "epoch": 0.77, "learning_rate": 6.988177455226501e-08, "logits/chosen": -3.1037189960479736, "logits/rejected": -2.9564051628112793, "logps/chosen": -169.9220733642578, "logps/rejected": -156.9796600341797, "loss": 0.4259, "rewards/accuracies": 0.75, "rewards/chosen": -0.051321886479854584, "rewards/margins": 1.4328068494796753, "rewards/rejected": -1.484128713607788, "step": 6684 }, { "epoch": 0.77, "learning_rate": 6.984665808264075e-08, "logits/chosen": -4.028126239776611, "logits/rejected": -3.4977357387542725, "logps/chosen": -285.17303466796875, "logps/rejected": -243.04954528808594, "loss": 0.7534, "rewards/accuracies": 0.875, "rewards/chosen": -0.6664621829986572, "rewards/margins": 1.6112804412841797, "rewards/rejected": -2.277742624282837, "step": 6685 }, { "epoch": 0.77, "learning_rate": 6.98115416130165e-08, "logits/chosen": -3.0857648849487305, "logits/rejected": -2.881385326385498, "logps/chosen": -207.3128662109375, "logps/rejected": -205.68197631835938, "loss": 0.3716, "rewards/accuracies": 0.875, "rewards/chosen": -0.40231770277023315, "rewards/margins": 1.7757490873336792, "rewards/rejected": -2.1780667304992676, "step": 6686 }, { "epoch": 0.77, "learning_rate": 6.977642514339224e-08, "logits/chosen": -3.3062264919281006, "logits/rejected": -3.8700459003448486, "logps/chosen": -327.03826904296875, "logps/rejected": -308.7749328613281, "loss": 0.2786, "rewards/accuracies": 0.75, "rewards/chosen": 0.4860183596611023, "rewards/margins": 3.414578437805176, "rewards/rejected": -2.928560256958008, "step": 6687 }, { "epoch": 0.77, "learning_rate": 6.9741308673768e-08, "logits/chosen": -3.3069076538085938, "logits/rejected": -2.9672775268554688, "logps/chosen": -197.88490295410156, "logps/rejected": -231.03875732421875, "loss": 0.3622, "rewards/accuracies": 0.75, "rewards/chosen": -0.3107379674911499, "rewards/margins": 1.9883081912994385, "rewards/rejected": -2.299046039581299, "step": 6688 }, { "epoch": 0.77, "learning_rate": 6.970619220414374e-08, "logits/chosen": -2.2867507934570312, "logits/rejected": -2.3371777534484863, "logps/chosen": -329.5579833984375, "logps/rejected": -273.6596984863281, "loss": 0.3441, "rewards/accuracies": 0.75, "rewards/chosen": -0.1117854118347168, "rewards/margins": 1.8767683506011963, "rewards/rejected": -1.9885536432266235, "step": 6689 }, { "epoch": 0.77, "learning_rate": 6.967107573451949e-08, "logits/chosen": -2.403434991836548, "logits/rejected": -2.490604877471924, "logps/chosen": -321.72454833984375, "logps/rejected": -294.1709899902344, "loss": 0.378, "rewards/accuracies": 0.75, "rewards/chosen": 0.1586051881313324, "rewards/margins": 1.4651830196380615, "rewards/rejected": -1.3065778017044067, "step": 6690 }, { "epoch": 0.77, "learning_rate": 6.963595926489523e-08, "logits/chosen": -3.4632797241210938, "logits/rejected": -2.8650028705596924, "logps/chosen": -224.56494140625, "logps/rejected": -221.48805236816406, "loss": 0.5331, "rewards/accuracies": 0.75, "rewards/chosen": -0.09693184494972229, "rewards/margins": 0.7122924327850342, "rewards/rejected": -0.8092243075370789, "step": 6691 }, { "epoch": 0.77, "learning_rate": 6.960084279527099e-08, "logits/chosen": -2.65057373046875, "logits/rejected": -2.860349178314209, "logps/chosen": -269.19403076171875, "logps/rejected": -252.79336547851562, "loss": 0.3104, "rewards/accuracies": 0.875, "rewards/chosen": -0.005473703145980835, "rewards/margins": 2.2291879653930664, "rewards/rejected": -2.23466157913208, "step": 6692 }, { "epoch": 0.77, "learning_rate": 6.956572632564673e-08, "logits/chosen": -2.7140681743621826, "logits/rejected": -2.541001319885254, "logps/chosen": -173.672119140625, "logps/rejected": -200.89979553222656, "loss": 0.2195, "rewards/accuracies": 1.0, "rewards/chosen": 0.3719238042831421, "rewards/margins": 1.954592227935791, "rewards/rejected": -1.5826683044433594, "step": 6693 }, { "epoch": 0.77, "learning_rate": 6.953060985602247e-08, "logits/chosen": -2.369978904724121, "logits/rejected": -2.412426710128784, "logps/chosen": -491.5198059082031, "logps/rejected": -305.1441650390625, "loss": 0.5659, "rewards/accuracies": 0.625, "rewards/chosen": -0.3981339931488037, "rewards/margins": 0.8056747913360596, "rewards/rejected": -1.2038087844848633, "step": 6694 }, { "epoch": 0.77, "learning_rate": 6.949549338639822e-08, "logits/chosen": -3.1163010597229004, "logits/rejected": -2.7899465560913086, "logps/chosen": -249.82479858398438, "logps/rejected": -238.3206787109375, "loss": 0.4163, "rewards/accuracies": 0.75, "rewards/chosen": -0.6367157101631165, "rewards/margins": 0.979875922203064, "rewards/rejected": -1.6165915727615356, "step": 6695 }, { "epoch": 0.77, "learning_rate": 6.946037691677396e-08, "logits/chosen": -2.7141871452331543, "logits/rejected": -2.260397434234619, "logps/chosen": -238.58926391601562, "logps/rejected": -253.93014526367188, "loss": 0.3872, "rewards/accuracies": 0.75, "rewards/chosen": -0.3174598813056946, "rewards/margins": 2.9219038486480713, "rewards/rejected": -3.2393639087677, "step": 6696 }, { "epoch": 0.77, "learning_rate": 6.94252604471497e-08, "logits/chosen": -3.612842559814453, "logits/rejected": -3.3571887016296387, "logps/chosen": -202.368408203125, "logps/rejected": -128.33477783203125, "loss": 0.628, "rewards/accuracies": 0.75, "rewards/chosen": -0.7427929043769836, "rewards/margins": 0.3290589451789856, "rewards/rejected": -1.0718518495559692, "step": 6697 }, { "epoch": 0.77, "learning_rate": 6.939014397752546e-08, "logits/chosen": -2.4758729934692383, "logits/rejected": -2.435983896255493, "logps/chosen": -231.92620849609375, "logps/rejected": -224.98638916015625, "loss": 0.5777, "rewards/accuracies": 0.5, "rewards/chosen": -0.08920292556285858, "rewards/margins": 0.8306562900543213, "rewards/rejected": -0.9198591709136963, "step": 6698 }, { "epoch": 0.77, "learning_rate": 6.93550275079012e-08, "logits/chosen": -2.631636381149292, "logits/rejected": -2.5341367721557617, "logps/chosen": -167.33358764648438, "logps/rejected": -202.0367889404297, "loss": 0.5301, "rewards/accuracies": 0.875, "rewards/chosen": 0.14251533150672913, "rewards/margins": 1.1371806859970093, "rewards/rejected": -0.994665265083313, "step": 6699 }, { "epoch": 0.77, "learning_rate": 6.931991103827695e-08, "logits/chosen": -3.4491076469421387, "logits/rejected": -3.3249030113220215, "logps/chosen": -280.7845153808594, "logps/rejected": -340.880126953125, "loss": 0.3092, "rewards/accuracies": 0.75, "rewards/chosen": 0.23002813756465912, "rewards/margins": 2.1367220878601074, "rewards/rejected": -1.906693935394287, "step": 6700 }, { "epoch": 0.77, "learning_rate": 6.928479456865269e-08, "logits/chosen": -4.165854454040527, "logits/rejected": -3.9465556144714355, "logps/chosen": -308.9183349609375, "logps/rejected": -217.78736877441406, "loss": 0.3733, "rewards/accuracies": 0.75, "rewards/chosen": -0.2005363404750824, "rewards/margins": 2.523404121398926, "rewards/rejected": -2.723940372467041, "step": 6701 }, { "epoch": 0.77, "learning_rate": 6.924967809902845e-08, "logits/chosen": -3.0210328102111816, "logits/rejected": -3.1567909717559814, "logps/chosen": -249.5545654296875, "logps/rejected": -296.9821472167969, "loss": 0.3312, "rewards/accuracies": 0.875, "rewards/chosen": -0.028331607580184937, "rewards/margins": 1.805163025856018, "rewards/rejected": -1.8334946632385254, "step": 6702 }, { "epoch": 0.77, "learning_rate": 6.921456162940419e-08, "logits/chosen": -2.8079795837402344, "logits/rejected": -2.703418254852295, "logps/chosen": -250.61407470703125, "logps/rejected": -208.05105590820312, "loss": 0.509, "rewards/accuracies": 0.75, "rewards/chosen": -0.38428425788879395, "rewards/margins": 1.064389705657959, "rewards/rejected": -1.448673963546753, "step": 6703 }, { "epoch": 0.77, "learning_rate": 6.917944515977994e-08, "logits/chosen": -3.065878391265869, "logits/rejected": -3.3464512825012207, "logps/chosen": -207.82421875, "logps/rejected": -180.16165161132812, "loss": 0.308, "rewards/accuracies": 0.875, "rewards/chosen": -0.10318169742822647, "rewards/margins": 1.5119572877883911, "rewards/rejected": -1.6151390075683594, "step": 6704 }, { "epoch": 0.77, "learning_rate": 6.914432869015568e-08, "logits/chosen": -2.4405694007873535, "logits/rejected": -2.912033796310425, "logps/chosen": -171.74766540527344, "logps/rejected": -220.39625549316406, "loss": 0.3265, "rewards/accuracies": 0.875, "rewards/chosen": 0.20217232406139374, "rewards/margins": 1.8300931453704834, "rewards/rejected": -1.6279206275939941, "step": 6705 }, { "epoch": 0.77, "learning_rate": 6.910921222053144e-08, "logits/chosen": -3.0810532569885254, "logits/rejected": -2.9901366233825684, "logps/chosen": -201.556884765625, "logps/rejected": -231.21929931640625, "loss": 0.4034, "rewards/accuracies": 0.75, "rewards/chosen": 0.16011367738246918, "rewards/margins": 1.2109529972076416, "rewards/rejected": -1.0508393049240112, "step": 6706 }, { "epoch": 0.77, "learning_rate": 6.907409575090718e-08, "logits/chosen": -3.664017677307129, "logits/rejected": -3.8751847743988037, "logps/chosen": -128.16595458984375, "logps/rejected": -208.48812866210938, "loss": 0.3481, "rewards/accuracies": 1.0, "rewards/chosen": 0.02687002532184124, "rewards/margins": 1.6927893161773682, "rewards/rejected": -1.665919542312622, "step": 6707 }, { "epoch": 0.77, "learning_rate": 6.903897928128292e-08, "logits/chosen": -3.779604434967041, "logits/rejected": -3.592599868774414, "logps/chosen": -331.807861328125, "logps/rejected": -268.95294189453125, "loss": 0.5366, "rewards/accuracies": 0.625, "rewards/chosen": -0.34618818759918213, "rewards/margins": 1.5669677257537842, "rewards/rejected": -1.9131557941436768, "step": 6708 }, { "epoch": 0.77, "learning_rate": 6.900386281165867e-08, "logits/chosen": -1.9799443483352661, "logits/rejected": -2.3324592113494873, "logps/chosen": -388.5754699707031, "logps/rejected": -304.8343200683594, "loss": 0.2636, "rewards/accuracies": 1.0, "rewards/chosen": 0.041544921696186066, "rewards/margins": 1.6176334619522095, "rewards/rejected": -1.576088547706604, "step": 6709 }, { "epoch": 0.77, "learning_rate": 6.896874634203441e-08, "logits/chosen": -3.3241117000579834, "logits/rejected": -3.6553640365600586, "logps/chosen": -156.78369140625, "logps/rejected": -184.84396362304688, "loss": 0.4084, "rewards/accuracies": 0.75, "rewards/chosen": -0.5225830078125, "rewards/margins": 1.7056708335876465, "rewards/rejected": -2.2282538414001465, "step": 6710 }, { "epoch": 0.77, "learning_rate": 6.893362987241015e-08, "logits/chosen": -2.772982597351074, "logits/rejected": -2.9071977138519287, "logps/chosen": -292.4184875488281, "logps/rejected": -298.1151123046875, "loss": 0.4043, "rewards/accuracies": 0.75, "rewards/chosen": -0.6170984506607056, "rewards/margins": 2.4586594104766846, "rewards/rejected": -3.0757577419281006, "step": 6711 }, { "epoch": 0.77, "learning_rate": 6.88985134027859e-08, "logits/chosen": -2.6297972202301025, "logits/rejected": -2.673067331314087, "logps/chosen": -212.06954956054688, "logps/rejected": -323.43798828125, "loss": 0.3525, "rewards/accuracies": 0.875, "rewards/chosen": -0.1884596347808838, "rewards/margins": 2.799792766571045, "rewards/rejected": -2.9882524013519287, "step": 6712 }, { "epoch": 0.77, "learning_rate": 6.886339693316165e-08, "logits/chosen": -3.1794915199279785, "logits/rejected": -3.0216410160064697, "logps/chosen": -152.64456176757812, "logps/rejected": -226.64984130859375, "loss": 0.0764, "rewards/accuracies": 1.0, "rewards/chosen": 0.31350061297416687, "rewards/margins": 3.30137038230896, "rewards/rejected": -2.9878697395324707, "step": 6713 }, { "epoch": 0.77, "learning_rate": 6.88282804635374e-08, "logits/chosen": -2.8323333263397217, "logits/rejected": -2.676301956176758, "logps/chosen": -170.78399658203125, "logps/rejected": -266.1107177734375, "loss": 0.1678, "rewards/accuracies": 1.0, "rewards/chosen": 0.47175097465515137, "rewards/margins": 2.5664544105529785, "rewards/rejected": -2.094703197479248, "step": 6714 }, { "epoch": 0.77, "learning_rate": 6.879316399391314e-08, "logits/chosen": -3.6261677742004395, "logits/rejected": -3.4445090293884277, "logps/chosen": -209.65435791015625, "logps/rejected": -152.09315490722656, "loss": 0.4167, "rewards/accuracies": 0.75, "rewards/chosen": -0.34582990407943726, "rewards/margins": 1.3734242916107178, "rewards/rejected": -1.7192540168762207, "step": 6715 }, { "epoch": 0.77, "learning_rate": 6.87580475242889e-08, "logits/chosen": -3.9616589546203613, "logits/rejected": -4.160224437713623, "logps/chosen": -448.9508972167969, "logps/rejected": -395.95074462890625, "loss": 0.4243, "rewards/accuracies": 0.875, "rewards/chosen": 0.1326984465122223, "rewards/margins": 1.4185104370117188, "rewards/rejected": -1.2858119010925293, "step": 6716 }, { "epoch": 0.77, "learning_rate": 6.872293105466463e-08, "logits/chosen": -3.589077949523926, "logits/rejected": -3.7929000854492188, "logps/chosen": -97.75932312011719, "logps/rejected": -164.31167602539062, "loss": 0.2832, "rewards/accuracies": 0.875, "rewards/chosen": 0.5288053154945374, "rewards/margins": 2.668259620666504, "rewards/rejected": -2.1394546031951904, "step": 6717 }, { "epoch": 0.77, "learning_rate": 6.868781458504038e-08, "logits/chosen": -3.2190661430358887, "logits/rejected": -3.196746587753296, "logps/chosen": -131.4701385498047, "logps/rejected": -240.82192993164062, "loss": 0.336, "rewards/accuracies": 0.875, "rewards/chosen": 0.2786732017993927, "rewards/margins": 2.012192726135254, "rewards/rejected": -1.7335196733474731, "step": 6718 }, { "epoch": 0.77, "learning_rate": 6.865269811541613e-08, "logits/chosen": -2.5830647945404053, "logits/rejected": -2.7446999549865723, "logps/chosen": -197.2930908203125, "logps/rejected": -220.03729248046875, "loss": 0.5581, "rewards/accuracies": 0.5, "rewards/chosen": -0.6865454912185669, "rewards/margins": 1.1993141174316406, "rewards/rejected": -1.885859489440918, "step": 6719 }, { "epoch": 0.77, "learning_rate": 6.861758164579187e-08, "logits/chosen": -2.7141997814178467, "logits/rejected": -3.18156099319458, "logps/chosen": -194.86984252929688, "logps/rejected": -290.25103759765625, "loss": 0.3386, "rewards/accuracies": 0.75, "rewards/chosen": -0.5052317380905151, "rewards/margins": 2.147275447845459, "rewards/rejected": -2.6525073051452637, "step": 6720 }, { "epoch": 0.77, "learning_rate": 6.858246517616762e-08, "logits/chosen": -3.4171056747436523, "logits/rejected": -3.4894349575042725, "logps/chosen": -254.89073181152344, "logps/rejected": -218.66152954101562, "loss": 0.3948, "rewards/accuracies": 0.75, "rewards/chosen": -0.1206553652882576, "rewards/margins": 2.0240213871002197, "rewards/rejected": -2.144676685333252, "step": 6721 }, { "epoch": 0.77, "learning_rate": 6.854734870654336e-08, "logits/chosen": -3.953467845916748, "logits/rejected": -3.4559316635131836, "logps/chosen": -222.63113403320312, "logps/rejected": -149.2471160888672, "loss": 0.5424, "rewards/accuracies": 0.625, "rewards/chosen": -0.4133056700229645, "rewards/margins": 1.4775272607803345, "rewards/rejected": -1.8908329010009766, "step": 6722 }, { "epoch": 0.78, "learning_rate": 6.851223223691912e-08, "logits/chosen": -3.2970376014709473, "logits/rejected": -3.287259578704834, "logps/chosen": -74.08699035644531, "logps/rejected": -177.20535278320312, "loss": 0.314, "rewards/accuracies": 0.875, "rewards/chosen": -0.2937467098236084, "rewards/margins": 1.9125843048095703, "rewards/rejected": -2.2063307762145996, "step": 6723 }, { "epoch": 0.78, "learning_rate": 6.847711576729486e-08, "logits/chosen": -3.0797908306121826, "logits/rejected": -3.4974417686462402, "logps/chosen": -96.86317443847656, "logps/rejected": -199.33294677734375, "loss": 0.2675, "rewards/accuracies": 0.875, "rewards/chosen": 0.09369315952062607, "rewards/margins": 2.2629525661468506, "rewards/rejected": -2.1692593097686768, "step": 6724 }, { "epoch": 0.78, "learning_rate": 6.84419992976706e-08, "logits/chosen": -2.5442748069763184, "logits/rejected": -2.6962130069732666, "logps/chosen": -333.09405517578125, "logps/rejected": -353.643798828125, "loss": 0.2489, "rewards/accuracies": 0.875, "rewards/chosen": 0.11452893912792206, "rewards/margins": 3.045459270477295, "rewards/rejected": -2.9309303760528564, "step": 6725 }, { "epoch": 0.78, "learning_rate": 6.840688282804635e-08, "logits/chosen": -3.0283076763153076, "logits/rejected": -3.076552152633667, "logps/chosen": -127.31571960449219, "logps/rejected": -241.12603759765625, "loss": 0.8069, "rewards/accuracies": 0.875, "rewards/chosen": -0.8784939050674438, "rewards/margins": 1.5343942642211914, "rewards/rejected": -2.4128880500793457, "step": 6726 }, { "epoch": 0.78, "learning_rate": 6.83717663584221e-08, "logits/chosen": -3.639894962310791, "logits/rejected": -3.145113468170166, "logps/chosen": -125.79888916015625, "logps/rejected": -113.52177429199219, "loss": 0.5894, "rewards/accuracies": 0.75, "rewards/chosen": -0.27103590965270996, "rewards/margins": 0.8505220413208008, "rewards/rejected": -1.1215579509735107, "step": 6727 }, { "epoch": 0.78, "learning_rate": 6.833664988879783e-08, "logits/chosen": -2.985213041305542, "logits/rejected": -2.97275447845459, "logps/chosen": -238.44332885742188, "logps/rejected": -400.7316589355469, "loss": 0.2834, "rewards/accuracies": 0.875, "rewards/chosen": -0.3571183383464813, "rewards/margins": 2.4925742149353027, "rewards/rejected": -2.8496925830841064, "step": 6728 }, { "epoch": 0.78, "learning_rate": 6.830153341917359e-08, "logits/chosen": -3.8371853828430176, "logits/rejected": -4.0859270095825195, "logps/chosen": -209.26126098632812, "logps/rejected": -244.828369140625, "loss": 0.2125, "rewards/accuracies": 1.0, "rewards/chosen": -0.37621572613716125, "rewards/margins": 2.1031761169433594, "rewards/rejected": -2.479391574859619, "step": 6729 }, { "epoch": 0.78, "learning_rate": 6.826641694954933e-08, "logits/chosen": -3.2145960330963135, "logits/rejected": -2.757065773010254, "logps/chosen": -282.7794189453125, "logps/rejected": -288.9834899902344, "loss": 0.4508, "rewards/accuracies": 0.75, "rewards/chosen": -0.4973795413970947, "rewards/margins": 1.7604317665100098, "rewards/rejected": -2.2578113079071045, "step": 6730 }, { "epoch": 0.78, "learning_rate": 6.823130047992508e-08, "logits/chosen": -3.1276392936706543, "logits/rejected": -3.2401773929595947, "logps/chosen": -164.40130615234375, "logps/rejected": -211.66004943847656, "loss": 0.2822, "rewards/accuracies": 0.875, "rewards/chosen": 0.4409925639629364, "rewards/margins": 2.64860200881958, "rewards/rejected": -2.207609176635742, "step": 6731 }, { "epoch": 0.78, "learning_rate": 6.819618401030082e-08, "logits/chosen": -2.721419095993042, "logits/rejected": -2.879438877105713, "logps/chosen": -205.6863250732422, "logps/rejected": -365.1981506347656, "loss": 0.4535, "rewards/accuracies": 0.75, "rewards/chosen": 0.08001469075679779, "rewards/margins": 1.901955246925354, "rewards/rejected": -1.8219406604766846, "step": 6732 }, { "epoch": 0.78, "learning_rate": 6.816106754067658e-08, "logits/chosen": -3.140615463256836, "logits/rejected": -2.9657602310180664, "logps/chosen": -189.47483825683594, "logps/rejected": -248.07785034179688, "loss": 0.2646, "rewards/accuracies": 0.875, "rewards/chosen": 0.19043980538845062, "rewards/margins": 2.8594305515289307, "rewards/rejected": -2.6689910888671875, "step": 6733 }, { "epoch": 0.78, "learning_rate": 6.812595107105232e-08, "logits/chosen": -3.192077159881592, "logits/rejected": -3.2576041221618652, "logps/chosen": -292.09600830078125, "logps/rejected": -383.9446105957031, "loss": 0.3518, "rewards/accuracies": 0.75, "rewards/chosen": 0.5639198422431946, "rewards/margins": 3.098264694213867, "rewards/rejected": -2.5343449115753174, "step": 6734 }, { "epoch": 0.78, "learning_rate": 6.809083460142807e-08, "logits/chosen": -3.1543610095977783, "logits/rejected": -3.1077637672424316, "logps/chosen": -253.5218505859375, "logps/rejected": -260.5858154296875, "loss": 0.9603, "rewards/accuracies": 0.625, "rewards/chosen": -1.7870556116104126, "rewards/margins": 0.790581226348877, "rewards/rejected": -2.57763671875, "step": 6735 }, { "epoch": 0.78, "learning_rate": 6.805571813180381e-08, "logits/chosen": -3.3754167556762695, "logits/rejected": -3.0748298168182373, "logps/chosen": -194.5589141845703, "logps/rejected": -154.12843322753906, "loss": 0.3291, "rewards/accuracies": 0.875, "rewards/chosen": -0.48940175771713257, "rewards/margins": 1.190769076347351, "rewards/rejected": -1.6801708936691284, "step": 6736 }, { "epoch": 0.78, "learning_rate": 6.802060166217957e-08, "logits/chosen": -2.3164124488830566, "logits/rejected": -2.206270456314087, "logps/chosen": -167.54647827148438, "logps/rejected": -346.6318359375, "loss": 0.2911, "rewards/accuracies": 0.875, "rewards/chosen": 0.49878543615341187, "rewards/margins": 2.0892560482025146, "rewards/rejected": -1.590470552444458, "step": 6737 }, { "epoch": 0.78, "learning_rate": 6.798548519255531e-08, "logits/chosen": -2.5072081089019775, "logits/rejected": -2.5344462394714355, "logps/chosen": -391.7557678222656, "logps/rejected": -384.0386047363281, "loss": 0.7118, "rewards/accuracies": 0.625, "rewards/chosen": -0.6141217947006226, "rewards/margins": 0.5466809272766113, "rewards/rejected": -1.1608027219772339, "step": 6738 }, { "epoch": 0.78, "learning_rate": 6.795036872293105e-08, "logits/chosen": -3.234917402267456, "logits/rejected": -3.0003583431243896, "logps/chosen": -140.91015625, "logps/rejected": -212.99766540527344, "loss": 0.4784, "rewards/accuracies": 0.625, "rewards/chosen": -0.5570802688598633, "rewards/margins": 0.9766693115234375, "rewards/rejected": -1.5337495803833008, "step": 6739 }, { "epoch": 0.78, "learning_rate": 6.79152522533068e-08, "logits/chosen": -3.507350444793701, "logits/rejected": -3.6469550132751465, "logps/chosen": -300.06341552734375, "logps/rejected": -289.1601257324219, "loss": 0.4252, "rewards/accuracies": 0.875, "rewards/chosen": 0.21855475008487701, "rewards/margins": 1.4474685192108154, "rewards/rejected": -1.2289139032363892, "step": 6740 }, { "epoch": 0.78, "learning_rate": 6.788013578368254e-08, "logits/chosen": -3.430755138397217, "logits/rejected": -3.4893906116485596, "logps/chosen": -233.16452026367188, "logps/rejected": -234.4214324951172, "loss": 0.3757, "rewards/accuracies": 0.75, "rewards/chosen": -0.48202037811279297, "rewards/margins": 1.6968953609466553, "rewards/rejected": -2.1789157390594482, "step": 6741 }, { "epoch": 0.78, "learning_rate": 6.784501931405828e-08, "logits/chosen": -2.916330337524414, "logits/rejected": -3.223453998565674, "logps/chosen": -380.7151794433594, "logps/rejected": -315.21337890625, "loss": 0.5683, "rewards/accuracies": 0.75, "rewards/chosen": -0.11315461993217468, "rewards/margins": 0.6936452984809875, "rewards/rejected": -0.8067998886108398, "step": 6742 }, { "epoch": 0.78, "learning_rate": 6.780990284443404e-08, "logits/chosen": -2.292304515838623, "logits/rejected": -2.218099594116211, "logps/chosen": -379.0765686035156, "logps/rejected": -304.52001953125, "loss": 0.7581, "rewards/accuracies": 0.375, "rewards/chosen": 0.03990750014781952, "rewards/margins": 0.5408694744110107, "rewards/rejected": -0.5009620189666748, "step": 6743 }, { "epoch": 0.78, "learning_rate": 6.777478637480978e-08, "logits/chosen": -3.335839033126831, "logits/rejected": -3.1805925369262695, "logps/chosen": -159.94342041015625, "logps/rejected": -213.87875366210938, "loss": 0.4119, "rewards/accuracies": 0.75, "rewards/chosen": 0.15493294596672058, "rewards/margins": 1.755934715270996, "rewards/rejected": -1.6010017395019531, "step": 6744 }, { "epoch": 0.78, "learning_rate": 6.773966990518553e-08, "logits/chosen": -3.0763607025146484, "logits/rejected": -3.220470905303955, "logps/chosen": -193.21755981445312, "logps/rejected": -246.71707153320312, "loss": 0.3546, "rewards/accuracies": 0.875, "rewards/chosen": 0.157790407538414, "rewards/margins": 1.964208722114563, "rewards/rejected": -1.8064182996749878, "step": 6745 }, { "epoch": 0.78, "learning_rate": 6.770455343556127e-08, "logits/chosen": -4.093331336975098, "logits/rejected": -3.918449878692627, "logps/chosen": -273.9168701171875, "logps/rejected": -225.55035400390625, "loss": 0.4165, "rewards/accuracies": 0.625, "rewards/chosen": -0.0437413826584816, "rewards/margins": 1.8876534700393677, "rewards/rejected": -1.9313948154449463, "step": 6746 }, { "epoch": 0.78, "learning_rate": 6.766943696593703e-08, "logits/chosen": -2.823594093322754, "logits/rejected": -2.828108549118042, "logps/chosen": -178.00807189941406, "logps/rejected": -167.67849731445312, "loss": 0.3685, "rewards/accuracies": 0.75, "rewards/chosen": -0.14032909274101257, "rewards/margins": 1.632938265800476, "rewards/rejected": -1.773267388343811, "step": 6747 }, { "epoch": 0.78, "learning_rate": 6.763432049631277e-08, "logits/chosen": -3.5244622230529785, "logits/rejected": -2.9853432178497314, "logps/chosen": -326.2583312988281, "logps/rejected": -375.45306396484375, "loss": 0.4111, "rewards/accuracies": 0.75, "rewards/chosen": 0.3484554588794708, "rewards/margins": 1.8319854736328125, "rewards/rejected": -1.4835301637649536, "step": 6748 }, { "epoch": 0.78, "learning_rate": 6.759920402668852e-08, "logits/chosen": -3.5182995796203613, "logits/rejected": -3.472938299179077, "logps/chosen": -178.6217041015625, "logps/rejected": -212.73318481445312, "loss": 0.4376, "rewards/accuracies": 0.75, "rewards/chosen": -0.2701353132724762, "rewards/margins": 1.1895174980163574, "rewards/rejected": -1.4596527814865112, "step": 6749 }, { "epoch": 0.78, "learning_rate": 6.756408755706426e-08, "logits/chosen": -3.1557199954986572, "logits/rejected": -3.30847430229187, "logps/chosen": -186.4058837890625, "logps/rejected": -349.13092041015625, "loss": 0.4101, "rewards/accuracies": 0.75, "rewards/chosen": 0.15429064631462097, "rewards/margins": 1.6989136934280396, "rewards/rejected": -1.5446232557296753, "step": 6750 }, { "epoch": 0.78, "learning_rate": 6.752897108744001e-08, "logits/chosen": -2.824800729751587, "logits/rejected": -2.909651279449463, "logps/chosen": -237.24766540527344, "logps/rejected": -248.22439575195312, "loss": 0.2015, "rewards/accuracies": 0.875, "rewards/chosen": -0.04972691833972931, "rewards/margins": 2.4869258403778076, "rewards/rejected": -2.5366525650024414, "step": 6751 }, { "epoch": 0.78, "learning_rate": 6.749385461781576e-08, "logits/chosen": -2.609004497528076, "logits/rejected": -2.7477595806121826, "logps/chosen": -244.7191162109375, "logps/rejected": -409.5538635253906, "loss": 0.151, "rewards/accuracies": 1.0, "rewards/chosen": 0.06118001043796539, "rewards/margins": 2.9354121685028076, "rewards/rejected": -2.874232292175293, "step": 6752 }, { "epoch": 0.78, "learning_rate": 6.74587381481915e-08, "logits/chosen": -3.1830174922943115, "logits/rejected": -3.420806407928467, "logps/chosen": -366.7705078125, "logps/rejected": -322.2606201171875, "loss": 0.2428, "rewards/accuracies": 0.875, "rewards/chosen": -0.03745002672076225, "rewards/margins": 1.7738221883773804, "rewards/rejected": -1.8112722635269165, "step": 6753 }, { "epoch": 0.78, "learning_rate": 6.742362167856725e-08, "logits/chosen": -3.294076919555664, "logits/rejected": -3.3481709957122803, "logps/chosen": -104.99702453613281, "logps/rejected": -195.2013702392578, "loss": 0.3271, "rewards/accuracies": 0.875, "rewards/chosen": 0.17698076367378235, "rewards/margins": 2.5153186321258545, "rewards/rejected": -2.3383378982543945, "step": 6754 }, { "epoch": 0.78, "learning_rate": 6.738850520894299e-08, "logits/chosen": -3.2617311477661133, "logits/rejected": -3.1979317665100098, "logps/chosen": -250.09423828125, "logps/rejected": -282.5974426269531, "loss": 0.4308, "rewards/accuracies": 0.875, "rewards/chosen": -0.41982129216194153, "rewards/margins": 2.2229840755462646, "rewards/rejected": -2.642805576324463, "step": 6755 }, { "epoch": 0.78, "learning_rate": 6.735338873931873e-08, "logits/chosen": -2.6085855960845947, "logits/rejected": -2.5268099308013916, "logps/chosen": -385.3382873535156, "logps/rejected": -295.58770751953125, "loss": 0.5526, "rewards/accuracies": 0.75, "rewards/chosen": 0.1918623149394989, "rewards/margins": 1.3135995864868164, "rewards/rejected": -1.1217373609542847, "step": 6756 }, { "epoch": 0.78, "learning_rate": 6.731827226969448e-08, "logits/chosen": -3.7514588832855225, "logits/rejected": -3.706843852996826, "logps/chosen": -352.1486511230469, "logps/rejected": -377.5674743652344, "loss": 0.7044, "rewards/accuracies": 0.625, "rewards/chosen": -0.6865320205688477, "rewards/margins": 1.8352543115615845, "rewards/rejected": -2.5217862129211426, "step": 6757 }, { "epoch": 0.78, "learning_rate": 6.728315580007023e-08, "logits/chosen": -3.1319174766540527, "logits/rejected": -2.9577760696411133, "logps/chosen": -346.95306396484375, "logps/rejected": -297.95489501953125, "loss": 0.3384, "rewards/accuracies": 0.875, "rewards/chosen": -0.23028407990932465, "rewards/margins": 2.3285415172576904, "rewards/rejected": -2.5588254928588867, "step": 6758 }, { "epoch": 0.78, "learning_rate": 6.724803933044598e-08, "logits/chosen": -3.114828109741211, "logits/rejected": -3.174327850341797, "logps/chosen": -287.14227294921875, "logps/rejected": -172.8885040283203, "loss": 0.4205, "rewards/accuracies": 0.75, "rewards/chosen": 0.050583165138959885, "rewards/margins": 1.3143872022628784, "rewards/rejected": -1.2638039588928223, "step": 6759 }, { "epoch": 0.78, "learning_rate": 6.721292286082172e-08, "logits/chosen": -2.3377041816711426, "logits/rejected": -2.2714385986328125, "logps/chosen": -445.12890625, "logps/rejected": -344.93212890625, "loss": 0.5583, "rewards/accuracies": 0.875, "rewards/chosen": -0.18782201409339905, "rewards/margins": 1.1310091018676758, "rewards/rejected": -1.3188310861587524, "step": 6760 }, { "epoch": 0.78, "learning_rate": 6.717780639119747e-08, "logits/chosen": -3.6790966987609863, "logits/rejected": -3.457943916320801, "logps/chosen": -190.5650634765625, "logps/rejected": -183.3907470703125, "loss": 0.3711, "rewards/accuracies": 0.75, "rewards/chosen": 0.03913339972496033, "rewards/margins": 1.7355867624282837, "rewards/rejected": -1.69645357131958, "step": 6761 }, { "epoch": 0.78, "learning_rate": 6.714268992157321e-08, "logits/chosen": -2.9540276527404785, "logits/rejected": -3.2087879180908203, "logps/chosen": -310.9033203125, "logps/rejected": -377.8594665527344, "loss": 0.3364, "rewards/accuracies": 0.875, "rewards/chosen": -0.07904690504074097, "rewards/margins": 2.6294355392456055, "rewards/rejected": -2.708482503890991, "step": 6762 }, { "epoch": 0.78, "learning_rate": 6.710757345194897e-08, "logits/chosen": -3.121577262878418, "logits/rejected": -2.9825241565704346, "logps/chosen": -190.72232055664062, "logps/rejected": -249.54039001464844, "loss": 0.2601, "rewards/accuracies": 0.875, "rewards/chosen": -0.32170891761779785, "rewards/margins": 2.3561086654663086, "rewards/rejected": -2.6778173446655273, "step": 6763 }, { "epoch": 0.78, "learning_rate": 6.707245698232471e-08, "logits/chosen": -2.6934216022491455, "logits/rejected": -2.5509557723999023, "logps/chosen": -219.39537048339844, "logps/rejected": -198.03453063964844, "loss": 0.3693, "rewards/accuracies": 0.625, "rewards/chosen": -0.053799454122781754, "rewards/margins": 2.354480266571045, "rewards/rejected": -2.4082798957824707, "step": 6764 }, { "epoch": 0.78, "learning_rate": 6.703734051270045e-08, "logits/chosen": -3.3928158283233643, "logits/rejected": -3.656370162963867, "logps/chosen": -200.74713134765625, "logps/rejected": -196.04647827148438, "loss": 0.4213, "rewards/accuracies": 0.875, "rewards/chosen": 0.24045610427856445, "rewards/margins": 1.6359903812408447, "rewards/rejected": -1.3955341577529907, "step": 6765 }, { "epoch": 0.78, "learning_rate": 6.70022240430762e-08, "logits/chosen": -3.3586671352386475, "logits/rejected": -3.0453810691833496, "logps/chosen": -281.5528869628906, "logps/rejected": -233.470458984375, "loss": 0.21, "rewards/accuracies": 1.0, "rewards/chosen": 0.18034525215625763, "rewards/margins": 1.91818368434906, "rewards/rejected": -1.7378385066986084, "step": 6766 }, { "epoch": 0.78, "learning_rate": 6.696710757345194e-08, "logits/chosen": -2.9963741302490234, "logits/rejected": -3.0809192657470703, "logps/chosen": -325.0373229980469, "logps/rejected": -338.1695556640625, "loss": 0.3675, "rewards/accuracies": 0.875, "rewards/chosen": 0.08370541781187057, "rewards/margins": 1.7142030000686646, "rewards/rejected": -1.6304974555969238, "step": 6767 }, { "epoch": 0.78, "learning_rate": 6.69319911038277e-08, "logits/chosen": -3.9133057594299316, "logits/rejected": -4.145673751831055, "logps/chosen": -141.12066650390625, "logps/rejected": -197.3154296875, "loss": 0.2254, "rewards/accuracies": 0.75, "rewards/chosen": -0.10973822325468063, "rewards/margins": 2.369058847427368, "rewards/rejected": -2.47879695892334, "step": 6768 }, { "epoch": 0.78, "learning_rate": 6.689687463420344e-08, "logits/chosen": -3.24739933013916, "logits/rejected": -2.9996776580810547, "logps/chosen": -219.65505981445312, "logps/rejected": -140.8477325439453, "loss": 0.3351, "rewards/accuracies": 0.875, "rewards/chosen": 0.08886474370956421, "rewards/margins": 1.2973827123641968, "rewards/rejected": -1.2085179090499878, "step": 6769 }, { "epoch": 0.78, "learning_rate": 6.686175816457918e-08, "logits/chosen": -3.363778591156006, "logits/rejected": -2.9350457191467285, "logps/chosen": -222.83401489257812, "logps/rejected": -140.0408477783203, "loss": 0.3642, "rewards/accuracies": 0.875, "rewards/chosen": 0.30889570713043213, "rewards/margins": 1.2068887948989868, "rewards/rejected": -0.8979930877685547, "step": 6770 }, { "epoch": 0.78, "learning_rate": 6.682664169495493e-08, "logits/chosen": -3.74440336227417, "logits/rejected": -3.0728302001953125, "logps/chosen": -344.6134338378906, "logps/rejected": -234.85174560546875, "loss": 0.4611, "rewards/accuracies": 0.75, "rewards/chosen": -0.15568304061889648, "rewards/margins": 1.9276459217071533, "rewards/rejected": -2.08332896232605, "step": 6771 }, { "epoch": 0.78, "learning_rate": 6.679152522533067e-08, "logits/chosen": -3.187014102935791, "logits/rejected": -2.7611026763916016, "logps/chosen": -400.0369873046875, "logps/rejected": -366.6341552734375, "loss": 0.248, "rewards/accuracies": 0.875, "rewards/chosen": 0.5380492210388184, "rewards/margins": 2.0029382705688477, "rewards/rejected": -1.4648890495300293, "step": 6772 }, { "epoch": 0.78, "learning_rate": 6.675640875570641e-08, "logits/chosen": -3.7856593132019043, "logits/rejected": -3.3120570182800293, "logps/chosen": -176.43128967285156, "logps/rejected": -233.589599609375, "loss": 0.2833, "rewards/accuracies": 0.875, "rewards/chosen": -0.28247469663619995, "rewards/margins": 2.0179269313812256, "rewards/rejected": -2.300401449203491, "step": 6773 }, { "epoch": 0.78, "learning_rate": 6.672129228608217e-08, "logits/chosen": -2.836500406265259, "logits/rejected": -2.7420554161071777, "logps/chosen": -241.29519653320312, "logps/rejected": -273.9883117675781, "loss": 0.2079, "rewards/accuracies": 1.0, "rewards/chosen": 0.23826636373996735, "rewards/margins": 2.014457941055298, "rewards/rejected": -1.7761915922164917, "step": 6774 }, { "epoch": 0.78, "learning_rate": 6.668617581645791e-08, "logits/chosen": -2.2259774208068848, "logits/rejected": -2.2946832180023193, "logps/chosen": -295.7873229980469, "logps/rejected": -362.86376953125, "loss": 0.3509, "rewards/accuracies": 0.875, "rewards/chosen": -0.06256968528032303, "rewards/margins": 1.7125999927520752, "rewards/rejected": -1.7751696109771729, "step": 6775 }, { "epoch": 0.78, "learning_rate": 6.665105934683366e-08, "logits/chosen": -2.91729736328125, "logits/rejected": -3.104247570037842, "logps/chosen": -243.43309020996094, "logps/rejected": -226.74729919433594, "loss": 0.4557, "rewards/accuracies": 0.75, "rewards/chosen": 0.028580009937286377, "rewards/margins": 1.3193166255950928, "rewards/rejected": -1.2907365560531616, "step": 6776 }, { "epoch": 0.78, "learning_rate": 6.66159428772094e-08, "logits/chosen": -2.8264846801757812, "logits/rejected": -2.886767864227295, "logps/chosen": -249.47784423828125, "logps/rejected": -345.46807861328125, "loss": 0.1872, "rewards/accuracies": 0.875, "rewards/chosen": 0.2977813482284546, "rewards/margins": 2.637699842453003, "rewards/rejected": -2.339918613433838, "step": 6777 }, { "epoch": 0.78, "learning_rate": 6.658082640758516e-08, "logits/chosen": -3.1995604038238525, "logits/rejected": -3.302152633666992, "logps/chosen": -242.20147705078125, "logps/rejected": -239.53463745117188, "loss": 0.3446, "rewards/accuracies": 0.75, "rewards/chosen": 0.4255322515964508, "rewards/margins": 2.058684825897217, "rewards/rejected": -1.633152723312378, "step": 6778 }, { "epoch": 0.78, "learning_rate": 6.65457099379609e-08, "logits/chosen": -3.6889188289642334, "logits/rejected": -3.450836181640625, "logps/chosen": -315.62506103515625, "logps/rejected": -386.10528564453125, "loss": 0.145, "rewards/accuracies": 1.0, "rewards/chosen": -0.4720994234085083, "rewards/margins": 2.7224912643432617, "rewards/rejected": -3.1945910453796387, "step": 6779 }, { "epoch": 0.78, "learning_rate": 6.651059346833665e-08, "logits/chosen": -3.357210874557495, "logits/rejected": -3.532439708709717, "logps/chosen": -130.021240234375, "logps/rejected": -150.75794982910156, "loss": 0.3988, "rewards/accuracies": 0.875, "rewards/chosen": -0.1779811531305313, "rewards/margins": 1.5697866678237915, "rewards/rejected": -1.7477679252624512, "step": 6780 }, { "epoch": 0.78, "learning_rate": 6.647547699871239e-08, "logits/chosen": -2.7669804096221924, "logits/rejected": -2.6656670570373535, "logps/chosen": -343.6640625, "logps/rejected": -286.4060974121094, "loss": 0.2533, "rewards/accuracies": 1.0, "rewards/chosen": 0.322679340839386, "rewards/margins": 1.9554792642593384, "rewards/rejected": -1.6328001022338867, "step": 6781 }, { "epoch": 0.78, "learning_rate": 6.644036052908815e-08, "logits/chosen": -3.6595458984375, "logits/rejected": -3.6741576194763184, "logps/chosen": -319.9183349609375, "logps/rejected": -178.3214111328125, "loss": 0.3645, "rewards/accuracies": 0.75, "rewards/chosen": -0.021932169795036316, "rewards/margins": 1.3726439476013184, "rewards/rejected": -1.394576072692871, "step": 6782 }, { "epoch": 0.78, "learning_rate": 6.640524405946389e-08, "logits/chosen": -2.7663049697875977, "logits/rejected": -2.773707866668701, "logps/chosen": -298.0637512207031, "logps/rejected": -331.1943054199219, "loss": 0.6273, "rewards/accuracies": 0.75, "rewards/chosen": -0.21168047189712524, "rewards/margins": 1.6439831256866455, "rewards/rejected": -1.8556636571884155, "step": 6783 }, { "epoch": 0.78, "learning_rate": 6.637012758983963e-08, "logits/chosen": -2.413454532623291, "logits/rejected": -2.4851861000061035, "logps/chosen": -313.65740966796875, "logps/rejected": -325.2154846191406, "loss": 0.744, "rewards/accuracies": 0.5, "rewards/chosen": -0.6182162165641785, "rewards/margins": 0.6682770848274231, "rewards/rejected": -1.2864933013916016, "step": 6784 }, { "epoch": 0.78, "learning_rate": 6.633501112021538e-08, "logits/chosen": -2.8339695930480957, "logits/rejected": -2.8127851486206055, "logps/chosen": -255.8428955078125, "logps/rejected": -264.88763427734375, "loss": 0.3299, "rewards/accuracies": 0.875, "rewards/chosen": 0.5841181874275208, "rewards/margins": 1.8814163208007812, "rewards/rejected": -1.2972981929779053, "step": 6785 }, { "epoch": 0.78, "learning_rate": 6.629989465059112e-08, "logits/chosen": -3.440809965133667, "logits/rejected": -3.3545117378234863, "logps/chosen": -276.4635009765625, "logps/rejected": -277.1734313964844, "loss": 0.8527, "rewards/accuracies": 0.625, "rewards/chosen": -1.1395409107208252, "rewards/margins": -0.1582939624786377, "rewards/rejected": -0.981246829032898, "step": 6786 }, { "epoch": 0.78, "learning_rate": 6.626477818096686e-08, "logits/chosen": -2.8321609497070312, "logits/rejected": -2.9400930404663086, "logps/chosen": -277.513427734375, "logps/rejected": -272.1589050292969, "loss": 0.3343, "rewards/accuracies": 0.75, "rewards/chosen": 0.26306143403053284, "rewards/margins": 2.845707416534424, "rewards/rejected": -2.582645893096924, "step": 6787 }, { "epoch": 0.78, "learning_rate": 6.622966171134262e-08, "logits/chosen": -3.9973134994506836, "logits/rejected": -3.818408966064453, "logps/chosen": -282.3304138183594, "logps/rejected": -231.53199768066406, "loss": 0.3774, "rewards/accuracies": 0.75, "rewards/chosen": -0.5747742056846619, "rewards/margins": 1.8720349073410034, "rewards/rejected": -2.4468092918395996, "step": 6788 }, { "epoch": 0.78, "learning_rate": 6.619454524171836e-08, "logits/chosen": -3.307832717895508, "logits/rejected": -3.1887705326080322, "logps/chosen": -175.63357543945312, "logps/rejected": -128.66934204101562, "loss": 0.5811, "rewards/accuracies": 0.625, "rewards/chosen": -0.8687562346458435, "rewards/margins": 0.5754307508468628, "rewards/rejected": -1.444187045097351, "step": 6789 }, { "epoch": 0.78, "learning_rate": 6.615942877209411e-08, "logits/chosen": -3.338956594467163, "logits/rejected": -3.8372104167938232, "logps/chosen": -232.51220703125, "logps/rejected": -250.4418182373047, "loss": 0.3114, "rewards/accuracies": 0.75, "rewards/chosen": 0.045563653111457825, "rewards/margins": 2.0462989807128906, "rewards/rejected": -2.000735282897949, "step": 6790 }, { "epoch": 0.78, "learning_rate": 6.612431230246985e-08, "logits/chosen": -3.0698294639587402, "logits/rejected": -3.0826964378356934, "logps/chosen": -267.5284423828125, "logps/rejected": -252.05587768554688, "loss": 0.598, "rewards/accuracies": 0.75, "rewards/chosen": -0.6337540149688721, "rewards/margins": 1.0007356405258179, "rewards/rejected": -1.6344897747039795, "step": 6791 }, { "epoch": 0.78, "learning_rate": 6.60891958328456e-08, "logits/chosen": -3.7292609214782715, "logits/rejected": -3.66865873336792, "logps/chosen": -259.9373779296875, "logps/rejected": -200.75469970703125, "loss": 0.4879, "rewards/accuracies": 0.875, "rewards/chosen": -0.01669202744960785, "rewards/margins": 2.5636091232299805, "rewards/rejected": -2.580300807952881, "step": 6792 }, { "epoch": 0.78, "learning_rate": 6.605407936322135e-08, "logits/chosen": -2.5751566886901855, "logits/rejected": -2.7059214115142822, "logps/chosen": -278.82928466796875, "logps/rejected": -324.973876953125, "loss": 0.1452, "rewards/accuracies": 1.0, "rewards/chosen": 0.011072635650634766, "rewards/margins": 3.033458948135376, "rewards/rejected": -3.022386312484741, "step": 6793 }, { "epoch": 0.78, "learning_rate": 6.60189628935971e-08, "logits/chosen": -2.929518699645996, "logits/rejected": -3.1311535835266113, "logps/chosen": -176.34332275390625, "logps/rejected": -283.4391174316406, "loss": 0.4829, "rewards/accuracies": 0.75, "rewards/chosen": -0.5106788873672485, "rewards/margins": 1.731013536453247, "rewards/rejected": -2.241692304611206, "step": 6794 }, { "epoch": 0.78, "learning_rate": 6.598384642397284e-08, "logits/chosen": -2.2326102256774902, "logits/rejected": -2.2284975051879883, "logps/chosen": -493.1743469238281, "logps/rejected": -302.8863220214844, "loss": 0.2954, "rewards/accuracies": 0.875, "rewards/chosen": -0.02053987979888916, "rewards/margins": 2.1247901916503906, "rewards/rejected": -2.1453301906585693, "step": 6795 }, { "epoch": 0.78, "learning_rate": 6.59487299543486e-08, "logits/chosen": -3.254190444946289, "logits/rejected": -3.0437073707580566, "logps/chosen": -251.1642608642578, "logps/rejected": -206.36558532714844, "loss": 0.5846, "rewards/accuracies": 0.75, "rewards/chosen": -0.20217253267765045, "rewards/margins": 1.5759482383728027, "rewards/rejected": -1.778120756149292, "step": 6796 }, { "epoch": 0.78, "learning_rate": 6.591361348472433e-08, "logits/chosen": -3.623806953430176, "logits/rejected": -3.25968861579895, "logps/chosen": -213.21212768554688, "logps/rejected": -194.13119506835938, "loss": 0.3354, "rewards/accuracies": 1.0, "rewards/chosen": 0.3657728433609009, "rewards/margins": 1.3045611381530762, "rewards/rejected": -0.9387881755828857, "step": 6797 }, { "epoch": 0.78, "learning_rate": 6.587849701510009e-08, "logits/chosen": -2.8545451164245605, "logits/rejected": -2.9689009189605713, "logps/chosen": -428.3218994140625, "logps/rejected": -402.47186279296875, "loss": 0.1716, "rewards/accuracies": 1.0, "rewards/chosen": -0.11881456524133682, "rewards/margins": 2.7766900062561035, "rewards/rejected": -2.8955044746398926, "step": 6798 }, { "epoch": 0.78, "learning_rate": 6.584338054547583e-08, "logits/chosen": -3.151240348815918, "logits/rejected": -3.191244125366211, "logps/chosen": -248.50411987304688, "logps/rejected": -275.0198059082031, "loss": 0.3302, "rewards/accuracies": 0.875, "rewards/chosen": 0.07250522077083588, "rewards/margins": 2.02418851852417, "rewards/rejected": -1.9516834020614624, "step": 6799 }, { "epoch": 0.78, "learning_rate": 6.580826407585157e-08, "logits/chosen": -2.534738063812256, "logits/rejected": -2.695298194885254, "logps/chosen": -344.35687255859375, "logps/rejected": -233.5052490234375, "loss": 0.3189, "rewards/accuracies": 0.75, "rewards/chosen": 0.15096017718315125, "rewards/margins": 1.6633756160736084, "rewards/rejected": -1.5124156475067139, "step": 6800 }, { "epoch": 0.78, "learning_rate": 6.577314760622731e-08, "logits/chosen": -3.6435608863830566, "logits/rejected": -3.618330717086792, "logps/chosen": -372.45458984375, "logps/rejected": -424.6346435546875, "loss": 0.3405, "rewards/accuracies": 0.875, "rewards/chosen": -0.36903122067451477, "rewards/margins": 1.4902716875076294, "rewards/rejected": -1.8593029975891113, "step": 6801 }, { "epoch": 0.78, "learning_rate": 6.573803113660306e-08, "logits/chosen": -3.243924617767334, "logits/rejected": -3.005852699279785, "logps/chosen": -290.582275390625, "logps/rejected": -231.67242431640625, "loss": 0.3837, "rewards/accuracies": 0.75, "rewards/chosen": -0.26784196496009827, "rewards/margins": 1.3116226196289062, "rewards/rejected": -1.5794646739959717, "step": 6802 }, { "epoch": 0.78, "learning_rate": 6.57029146669788e-08, "logits/chosen": -3.0750694274902344, "logits/rejected": -3.0231032371520996, "logps/chosen": -265.1117858886719, "logps/rejected": -264.3658447265625, "loss": 0.2899, "rewards/accuracies": 1.0, "rewards/chosen": -0.07153302431106567, "rewards/margins": 2.1334519386291504, "rewards/rejected": -2.2049849033355713, "step": 6803 }, { "epoch": 0.78, "learning_rate": 6.566779819735456e-08, "logits/chosen": -3.1575279235839844, "logits/rejected": -3.2016336917877197, "logps/chosen": -188.55160522460938, "logps/rejected": -404.25933837890625, "loss": 0.39, "rewards/accuracies": 0.875, "rewards/chosen": -0.49122166633605957, "rewards/margins": 2.0866000652313232, "rewards/rejected": -2.577822208404541, "step": 6804 }, { "epoch": 0.78, "learning_rate": 6.56326817277303e-08, "logits/chosen": -2.8930015563964844, "logits/rejected": -2.9958088397979736, "logps/chosen": -224.76318359375, "logps/rejected": -237.51742553710938, "loss": 0.3313, "rewards/accuracies": 0.875, "rewards/chosen": 0.5508943796157837, "rewards/margins": 1.6808643341064453, "rewards/rejected": -1.1299699544906616, "step": 6805 }, { "epoch": 0.78, "learning_rate": 6.559756525810605e-08, "logits/chosen": -3.676809549331665, "logits/rejected": -3.416433095932007, "logps/chosen": -290.9303894042969, "logps/rejected": -263.67584228515625, "loss": 0.1392, "rewards/accuracies": 1.0, "rewards/chosen": -0.29353535175323486, "rewards/margins": 3.2508864402770996, "rewards/rejected": -3.544421911239624, "step": 6806 }, { "epoch": 0.78, "learning_rate": 6.55624487884818e-08, "logits/chosen": -3.5082030296325684, "logits/rejected": -3.3803114891052246, "logps/chosen": -291.1971435546875, "logps/rejected": -139.90341186523438, "loss": 0.3721, "rewards/accuracies": 0.875, "rewards/chosen": -0.008053764700889587, "rewards/margins": 1.2492146492004395, "rewards/rejected": -1.2572684288024902, "step": 6807 }, { "epoch": 0.78, "learning_rate": 6.552733231885755e-08, "logits/chosen": -3.324326515197754, "logits/rejected": -3.219181776046753, "logps/chosen": -137.62664794921875, "logps/rejected": -160.95101928710938, "loss": 0.3778, "rewards/accuracies": 0.75, "rewards/chosen": -0.22813719511032104, "rewards/margins": 1.5762128829956055, "rewards/rejected": -1.8043501377105713, "step": 6808 }, { "epoch": 0.78, "learning_rate": 6.549221584923329e-08, "logits/chosen": -3.5332770347595215, "logits/rejected": -3.6086230278015137, "logps/chosen": -145.14193725585938, "logps/rejected": -185.52456665039062, "loss": 0.6059, "rewards/accuracies": 0.625, "rewards/chosen": -0.7385649085044861, "rewards/margins": 1.2557134628295898, "rewards/rejected": -1.9942783117294312, "step": 6809 }, { "epoch": 0.79, "learning_rate": 6.545709937960904e-08, "logits/chosen": -2.8462533950805664, "logits/rejected": -2.814697027206421, "logps/chosen": -200.31187438964844, "logps/rejected": -177.14085388183594, "loss": 0.29, "rewards/accuracies": 0.75, "rewards/chosen": 0.22135767340660095, "rewards/margins": 2.3665382862091064, "rewards/rejected": -2.1451804637908936, "step": 6810 }, { "epoch": 0.79, "learning_rate": 6.542198290998478e-08, "logits/chosen": -4.075146198272705, "logits/rejected": -3.4060144424438477, "logps/chosen": -462.3502197265625, "logps/rejected": -235.717041015625, "loss": 0.6143, "rewards/accuracies": 0.625, "rewards/chosen": -1.1132855415344238, "rewards/margins": 1.1087597608566284, "rewards/rejected": -2.222045421600342, "step": 6811 }, { "epoch": 0.79, "learning_rate": 6.538686644036054e-08, "logits/chosen": -2.4754040241241455, "logits/rejected": -2.546112298965454, "logps/chosen": -123.22860717773438, "logps/rejected": -258.0232238769531, "loss": 0.2442, "rewards/accuracies": 0.875, "rewards/chosen": -0.18078333139419556, "rewards/margins": 2.830596923828125, "rewards/rejected": -3.011380195617676, "step": 6812 }, { "epoch": 0.79, "learning_rate": 6.535174997073628e-08, "logits/chosen": -2.5822649002075195, "logits/rejected": -2.540574550628662, "logps/chosen": -429.48834228515625, "logps/rejected": -270.4426574707031, "loss": 0.5582, "rewards/accuracies": 0.875, "rewards/chosen": 0.24676388502120972, "rewards/margins": 0.573560893535614, "rewards/rejected": -0.3267970085144043, "step": 6813 }, { "epoch": 0.79, "learning_rate": 6.531663350111202e-08, "logits/chosen": -3.6841418743133545, "logits/rejected": -3.4845662117004395, "logps/chosen": -405.1519775390625, "logps/rejected": -234.80450439453125, "loss": 0.4682, "rewards/accuracies": 0.75, "rewards/chosen": -0.4059818387031555, "rewards/margins": 1.407465934753418, "rewards/rejected": -1.8134475946426392, "step": 6814 }, { "epoch": 0.79, "learning_rate": 6.528151703148777e-08, "logits/chosen": -2.416717290878296, "logits/rejected": -2.509876251220703, "logps/chosen": -348.9311218261719, "logps/rejected": -206.58132934570312, "loss": 0.6232, "rewards/accuracies": 0.75, "rewards/chosen": -0.3263876736164093, "rewards/margins": 0.6023266315460205, "rewards/rejected": -0.9287142753601074, "step": 6815 }, { "epoch": 0.79, "learning_rate": 6.524640056186351e-08, "logits/chosen": -3.0835540294647217, "logits/rejected": -3.078287124633789, "logps/chosen": -410.3081359863281, "logps/rejected": -322.372802734375, "loss": 0.144, "rewards/accuracies": 1.0, "rewards/chosen": 0.44953134655952454, "rewards/margins": 4.033242225646973, "rewards/rejected": -3.5837106704711914, "step": 6816 }, { "epoch": 0.79, "learning_rate": 6.521128409223925e-08, "logits/chosen": -3.110285758972168, "logits/rejected": -2.9107210636138916, "logps/chosen": -252.9046173095703, "logps/rejected": -179.50257873535156, "loss": 0.4523, "rewards/accuracies": 0.75, "rewards/chosen": -0.26467981934547424, "rewards/margins": 1.929786205291748, "rewards/rejected": -2.1944661140441895, "step": 6817 }, { "epoch": 0.79, "learning_rate": 6.5176167622615e-08, "logits/chosen": -3.3176422119140625, "logits/rejected": -3.4946208000183105, "logps/chosen": -288.4944763183594, "logps/rejected": -193.10409545898438, "loss": 0.6439, "rewards/accuracies": 0.625, "rewards/chosen": -0.23782888054847717, "rewards/margins": 1.6781200170516968, "rewards/rejected": -1.9159488677978516, "step": 6818 }, { "epoch": 0.79, "learning_rate": 6.514105115299075e-08, "logits/chosen": -3.3733420372009277, "logits/rejected": -3.5341546535491943, "logps/chosen": -280.0050048828125, "logps/rejected": -271.9837951660156, "loss": 0.2155, "rewards/accuracies": 1.0, "rewards/chosen": 0.07101286947727203, "rewards/margins": 1.7053579092025757, "rewards/rejected": -1.6343448162078857, "step": 6819 }, { "epoch": 0.79, "learning_rate": 6.510593468336649e-08, "logits/chosen": -3.0949971675872803, "logits/rejected": -2.9136245250701904, "logps/chosen": -252.19403076171875, "logps/rejected": -139.9955291748047, "loss": 0.3175, "rewards/accuracies": 1.0, "rewards/chosen": -0.005233511328697205, "rewards/margins": 1.298378586769104, "rewards/rejected": -1.3036121129989624, "step": 6820 }, { "epoch": 0.79, "learning_rate": 6.507081821374224e-08, "logits/chosen": -2.731044054031372, "logits/rejected": -3.0995235443115234, "logps/chosen": -289.49560546875, "logps/rejected": -297.180908203125, "loss": 0.4163, "rewards/accuracies": 0.75, "rewards/chosen": -0.21933630108833313, "rewards/margins": 1.7601103782653809, "rewards/rejected": -1.9794467687606812, "step": 6821 }, { "epoch": 0.79, "learning_rate": 6.503570174411798e-08, "logits/chosen": -2.9692575931549072, "logits/rejected": -3.4178466796875, "logps/chosen": -204.48736572265625, "logps/rejected": -250.20037841796875, "loss": 0.394, "rewards/accuracies": 0.75, "rewards/chosen": -0.24500279128551483, "rewards/margins": 1.3969091176986694, "rewards/rejected": -1.6419118642807007, "step": 6822 }, { "epoch": 0.79, "learning_rate": 6.500058527449374e-08, "logits/chosen": -3.520697593688965, "logits/rejected": -3.402129650115967, "logps/chosen": -382.16400146484375, "logps/rejected": -261.93499755859375, "loss": 0.379, "rewards/accuracies": 0.875, "rewards/chosen": 0.13567866384983063, "rewards/margins": 1.9754241704940796, "rewards/rejected": -1.8397455215454102, "step": 6823 }, { "epoch": 0.79, "learning_rate": 6.496546880486948e-08, "logits/chosen": -2.646972179412842, "logits/rejected": -2.864185333251953, "logps/chosen": -104.6274185180664, "logps/rejected": -171.93748474121094, "loss": 0.6351, "rewards/accuracies": 0.625, "rewards/chosen": -0.5043372511863708, "rewards/margins": 0.32665354013442993, "rewards/rejected": -0.8309907913208008, "step": 6824 }, { "epoch": 0.79, "learning_rate": 6.493035233524523e-08, "logits/chosen": -3.1588165760040283, "logits/rejected": -3.0602598190307617, "logps/chosen": -315.955810546875, "logps/rejected": -276.62884521484375, "loss": 0.2363, "rewards/accuracies": 0.875, "rewards/chosen": -0.004336923360824585, "rewards/margins": 2.298098564147949, "rewards/rejected": -2.3024356365203857, "step": 6825 }, { "epoch": 0.79, "learning_rate": 6.489523586562097e-08, "logits/chosen": -3.055269718170166, "logits/rejected": -2.679443597793579, "logps/chosen": -233.97152709960938, "logps/rejected": -209.08705139160156, "loss": 0.3441, "rewards/accuracies": 0.875, "rewards/chosen": -0.025846242904663086, "rewards/margins": 1.4674662351608276, "rewards/rejected": -1.4933124780654907, "step": 6826 }, { "epoch": 0.79, "learning_rate": 6.486011939599673e-08, "logits/chosen": -3.4356706142425537, "logits/rejected": -3.129436492919922, "logps/chosen": -291.881591796875, "logps/rejected": -176.69021606445312, "loss": 0.2119, "rewards/accuracies": 1.0, "rewards/chosen": -0.3934863805770874, "rewards/margins": 2.326484203338623, "rewards/rejected": -2.719970464706421, "step": 6827 }, { "epoch": 0.79, "learning_rate": 6.482500292637247e-08, "logits/chosen": -2.8660202026367188, "logits/rejected": -3.2505035400390625, "logps/chosen": -141.67660522460938, "logps/rejected": -170.54150390625, "loss": 0.9362, "rewards/accuracies": 0.5, "rewards/chosen": -0.7895552515983582, "rewards/margins": 0.45149314403533936, "rewards/rejected": -1.2410484552383423, "step": 6828 }, { "epoch": 0.79, "learning_rate": 6.478988645674822e-08, "logits/chosen": -2.951704263687134, "logits/rejected": -2.724363088607788, "logps/chosen": -218.65257263183594, "logps/rejected": -338.906005859375, "loss": 0.181, "rewards/accuracies": 1.0, "rewards/chosen": -0.11450247466564178, "rewards/margins": 2.7068371772766113, "rewards/rejected": -2.8213396072387695, "step": 6829 }, { "epoch": 0.79, "learning_rate": 6.475476998712396e-08, "logits/chosen": -2.655768632888794, "logits/rejected": -2.5083253383636475, "logps/chosen": -318.69573974609375, "logps/rejected": -212.20181274414062, "loss": 0.6272, "rewards/accuracies": 0.625, "rewards/chosen": -0.14670154452323914, "rewards/margins": 0.700103759765625, "rewards/rejected": -0.8468053340911865, "step": 6830 }, { "epoch": 0.79, "learning_rate": 6.47196535174997e-08, "logits/chosen": -3.526679039001465, "logits/rejected": -3.421184778213501, "logps/chosen": -184.98226928710938, "logps/rejected": -212.30490112304688, "loss": 0.2182, "rewards/accuracies": 0.875, "rewards/chosen": 0.2574253976345062, "rewards/margins": 2.6191964149475098, "rewards/rejected": -2.3617711067199707, "step": 6831 }, { "epoch": 0.79, "learning_rate": 6.468453704787545e-08, "logits/chosen": -3.065885066986084, "logits/rejected": -3.1498255729675293, "logps/chosen": -160.78990173339844, "logps/rejected": -182.32052612304688, "loss": 0.2004, "rewards/accuracies": 1.0, "rewards/chosen": 0.9904659986495972, "rewards/margins": 2.6232385635375977, "rewards/rejected": -1.6327725648880005, "step": 6832 }, { "epoch": 0.79, "learning_rate": 6.46494205782512e-08, "logits/chosen": -3.322476625442505, "logits/rejected": -3.3201043605804443, "logps/chosen": -263.06976318359375, "logps/rejected": -278.8676452636719, "loss": 0.2113, "rewards/accuracies": 1.0, "rewards/chosen": 0.1049315333366394, "rewards/margins": 2.6032118797302246, "rewards/rejected": -2.4982802867889404, "step": 6833 }, { "epoch": 0.79, "learning_rate": 6.461430410862694e-08, "logits/chosen": -3.4816198348999023, "logits/rejected": -3.0614264011383057, "logps/chosen": -537.0972900390625, "logps/rejected": -357.44903564453125, "loss": 0.2455, "rewards/accuracies": 1.0, "rewards/chosen": -0.09117260575294495, "rewards/margins": 1.797135353088379, "rewards/rejected": -1.8883081674575806, "step": 6834 }, { "epoch": 0.79, "learning_rate": 6.457918763900269e-08, "logits/chosen": -3.2478866577148438, "logits/rejected": -3.455711841583252, "logps/chosen": -265.21160888671875, "logps/rejected": -258.420654296875, "loss": 0.2991, "rewards/accuracies": 0.875, "rewards/chosen": -0.28122416138648987, "rewards/margins": 1.3312146663665771, "rewards/rejected": -1.6124387979507446, "step": 6835 }, { "epoch": 0.79, "learning_rate": 6.454407116937843e-08, "logits/chosen": -2.487886905670166, "logits/rejected": -2.1274523735046387, "logps/chosen": -198.15780639648438, "logps/rejected": -287.2400207519531, "loss": 0.4461, "rewards/accuracies": 0.75, "rewards/chosen": 0.22394424676895142, "rewards/margins": 1.759330153465271, "rewards/rejected": -1.5353859663009644, "step": 6836 }, { "epoch": 0.79, "learning_rate": 6.450895469975418e-08, "logits/chosen": -3.334784507751465, "logits/rejected": -3.4259417057037354, "logps/chosen": -205.77072143554688, "logps/rejected": -342.7296142578125, "loss": 0.4599, "rewards/accuracies": 0.75, "rewards/chosen": 0.22416365146636963, "rewards/margins": 1.3657113313674927, "rewards/rejected": -1.141547679901123, "step": 6837 }, { "epoch": 0.79, "learning_rate": 6.447383823012992e-08, "logits/chosen": -2.63106632232666, "logits/rejected": -2.457960605621338, "logps/chosen": -315.22119140625, "logps/rejected": -244.2825927734375, "loss": 0.4086, "rewards/accuracies": 0.75, "rewards/chosen": 0.23190326988697052, "rewards/margins": 1.2602914571762085, "rewards/rejected": -1.0283881425857544, "step": 6838 }, { "epoch": 0.79, "learning_rate": 6.443872176050568e-08, "logits/chosen": -3.7818500995635986, "logits/rejected": -3.5495409965515137, "logps/chosen": -322.14459228515625, "logps/rejected": -368.0477294921875, "loss": 0.2012, "rewards/accuracies": 1.0, "rewards/chosen": 0.2929685115814209, "rewards/margins": 3.0361342430114746, "rewards/rejected": -2.7431657314300537, "step": 6839 }, { "epoch": 0.79, "learning_rate": 6.440360529088142e-08, "logits/chosen": -3.5276098251342773, "logits/rejected": -3.145176649093628, "logps/chosen": -187.4014434814453, "logps/rejected": -226.14730834960938, "loss": 0.364, "rewards/accuracies": 0.75, "rewards/chosen": -0.1325991451740265, "rewards/margins": 1.8107569217681885, "rewards/rejected": -1.9433560371398926, "step": 6840 }, { "epoch": 0.79, "learning_rate": 6.436848882125717e-08, "logits/chosen": -3.291114330291748, "logits/rejected": -3.165273427963257, "logps/chosen": -313.86248779296875, "logps/rejected": -209.34783935546875, "loss": 0.3262, "rewards/accuracies": 0.875, "rewards/chosen": -0.6855396032333374, "rewards/margins": 1.6952064037322998, "rewards/rejected": -2.3807458877563477, "step": 6841 }, { "epoch": 0.79, "learning_rate": 6.433337235163291e-08, "logits/chosen": -2.854081630706787, "logits/rejected": -3.0671939849853516, "logps/chosen": -362.5175476074219, "logps/rejected": -205.03860473632812, "loss": 0.3861, "rewards/accuracies": 0.875, "rewards/chosen": -0.19321422278881073, "rewards/margins": 1.5972611904144287, "rewards/rejected": -1.7904754877090454, "step": 6842 }, { "epoch": 0.79, "learning_rate": 6.429825588200867e-08, "logits/chosen": -2.711979389190674, "logits/rejected": -2.952932119369507, "logps/chosen": -265.2267761230469, "logps/rejected": -213.13897705078125, "loss": 0.4093, "rewards/accuracies": 0.75, "rewards/chosen": -0.7980820536613464, "rewards/margins": 1.2786426544189453, "rewards/rejected": -2.0767245292663574, "step": 6843 }, { "epoch": 0.79, "learning_rate": 6.426313941238441e-08, "logits/chosen": -3.6388802528381348, "logits/rejected": -3.4347779750823975, "logps/chosen": -252.03073120117188, "logps/rejected": -196.06954956054688, "loss": 0.6733, "rewards/accuracies": 0.75, "rewards/chosen": -0.17113324999809265, "rewards/margins": 0.9060022830963135, "rewards/rejected": -1.0771355628967285, "step": 6844 }, { "epoch": 0.79, "learning_rate": 6.422802294276015e-08, "logits/chosen": -3.3597936630249023, "logits/rejected": -3.6872520446777344, "logps/chosen": -192.5332794189453, "logps/rejected": -356.21484375, "loss": 0.6125, "rewards/accuracies": 0.75, "rewards/chosen": -0.03905147314071655, "rewards/margins": 1.770747184753418, "rewards/rejected": -1.8097987174987793, "step": 6845 }, { "epoch": 0.79, "learning_rate": 6.41929064731359e-08, "logits/chosen": -3.152092456817627, "logits/rejected": -2.998012065887451, "logps/chosen": -268.07440185546875, "logps/rejected": -295.1825866699219, "loss": 0.2639, "rewards/accuracies": 0.875, "rewards/chosen": -0.06760285794734955, "rewards/margins": 2.1348395347595215, "rewards/rejected": -2.2024424076080322, "step": 6846 }, { "epoch": 0.79, "learning_rate": 6.415779000351164e-08, "logits/chosen": -3.4751336574554443, "logits/rejected": -3.2016994953155518, "logps/chosen": -331.830322265625, "logps/rejected": -226.12942504882812, "loss": 0.3918, "rewards/accuracies": 0.875, "rewards/chosen": -0.40196648240089417, "rewards/margins": 1.5671952962875366, "rewards/rejected": -1.9691617488861084, "step": 6847 }, { "epoch": 0.79, "learning_rate": 6.412267353388738e-08, "logits/chosen": -3.483635663986206, "logits/rejected": -3.550340414047241, "logps/chosen": -148.66677856445312, "logps/rejected": -149.79293823242188, "loss": 0.4052, "rewards/accuracies": 1.0, "rewards/chosen": 0.020672565326094627, "rewards/margins": 1.3683110475540161, "rewards/rejected": -1.3476386070251465, "step": 6848 }, { "epoch": 0.79, "learning_rate": 6.408755706426314e-08, "logits/chosen": -3.5034255981445312, "logits/rejected": -3.1293697357177734, "logps/chosen": -292.9563293457031, "logps/rejected": -232.19862365722656, "loss": 0.3061, "rewards/accuracies": 0.875, "rewards/chosen": -0.163407564163208, "rewards/margins": 2.2269904613494873, "rewards/rejected": -2.3903980255126953, "step": 6849 }, { "epoch": 0.79, "learning_rate": 6.405244059463888e-08, "logits/chosen": -3.2432289123535156, "logits/rejected": -2.998201847076416, "logps/chosen": -224.025390625, "logps/rejected": -338.45501708984375, "loss": 0.5427, "rewards/accuracies": 0.75, "rewards/chosen": -0.5834694504737854, "rewards/margins": 0.4937437176704407, "rewards/rejected": -1.077213168144226, "step": 6850 }, { "epoch": 0.79, "learning_rate": 6.401732412501463e-08, "logits/chosen": -2.7048275470733643, "logits/rejected": -2.884060859680176, "logps/chosen": -341.81610107421875, "logps/rejected": -250.89706420898438, "loss": 0.3224, "rewards/accuracies": 0.875, "rewards/chosen": -0.32893475890159607, "rewards/margins": 1.8751518726348877, "rewards/rejected": -2.2040867805480957, "step": 6851 }, { "epoch": 0.79, "learning_rate": 6.398220765539037e-08, "logits/chosen": -2.9495837688446045, "logits/rejected": -3.0840003490448, "logps/chosen": -244.6237335205078, "logps/rejected": -247.16070556640625, "loss": 0.3171, "rewards/accuracies": 0.875, "rewards/chosen": 0.23765386641025543, "rewards/margins": 2.435872793197632, "rewards/rejected": -2.198219060897827, "step": 6852 }, { "epoch": 0.79, "learning_rate": 6.394709118576613e-08, "logits/chosen": -2.576324939727783, "logits/rejected": -2.7576746940612793, "logps/chosen": -357.1170349121094, "logps/rejected": -321.25433349609375, "loss": 0.2696, "rewards/accuracies": 0.75, "rewards/chosen": 0.14340072870254517, "rewards/margins": 2.000047445297241, "rewards/rejected": -1.8566467761993408, "step": 6853 }, { "epoch": 0.79, "learning_rate": 6.391197471614187e-08, "logits/chosen": -2.7901506423950195, "logits/rejected": -2.627124309539795, "logps/chosen": -246.06459045410156, "logps/rejected": -317.5487976074219, "loss": 0.4376, "rewards/accuracies": 0.875, "rewards/chosen": -0.11575320363044739, "rewards/margins": 1.1238127946853638, "rewards/rejected": -1.2395659685134888, "step": 6854 }, { "epoch": 0.79, "learning_rate": 6.387685824651762e-08, "logits/chosen": -3.6491472721099854, "logits/rejected": -3.2009377479553223, "logps/chosen": -164.7742919921875, "logps/rejected": -257.3173828125, "loss": 0.1883, "rewards/accuracies": 1.0, "rewards/chosen": 0.15473785996437073, "rewards/margins": 3.5676796436309814, "rewards/rejected": -3.4129416942596436, "step": 6855 }, { "epoch": 0.79, "learning_rate": 6.384174177689336e-08, "logits/chosen": -3.6167948246002197, "logits/rejected": -3.400479316711426, "logps/chosen": -255.48751831054688, "logps/rejected": -237.25625610351562, "loss": 0.2229, "rewards/accuracies": 0.875, "rewards/chosen": 0.7463910579681396, "rewards/margins": 2.4294629096984863, "rewards/rejected": -1.6830718517303467, "step": 6856 }, { "epoch": 0.79, "learning_rate": 6.380662530726912e-08, "logits/chosen": -3.2815089225769043, "logits/rejected": -2.9852240085601807, "logps/chosen": -215.11795043945312, "logps/rejected": -200.4466552734375, "loss": 0.4554, "rewards/accuracies": 0.875, "rewards/chosen": -0.32384806871414185, "rewards/margins": 1.28456711769104, "rewards/rejected": -1.608415126800537, "step": 6857 }, { "epoch": 0.79, "learning_rate": 6.377150883764486e-08, "logits/chosen": -2.9041736125946045, "logits/rejected": -2.953225612640381, "logps/chosen": -364.7045593261719, "logps/rejected": -420.2625732421875, "loss": 0.3503, "rewards/accuracies": 0.75, "rewards/chosen": 0.0018400996923446655, "rewards/margins": 2.2849137783050537, "rewards/rejected": -2.283073663711548, "step": 6858 }, { "epoch": 0.79, "learning_rate": 6.37363923680206e-08, "logits/chosen": -3.1414053440093994, "logits/rejected": -3.195016860961914, "logps/chosen": -169.529052734375, "logps/rejected": -186.9878387451172, "loss": 0.3968, "rewards/accuracies": 0.875, "rewards/chosen": -0.13334645330905914, "rewards/margins": 1.7156486511230469, "rewards/rejected": -1.8489950895309448, "step": 6859 }, { "epoch": 0.79, "learning_rate": 6.370127589839635e-08, "logits/chosen": -2.6622955799102783, "logits/rejected": -2.5270578861236572, "logps/chosen": -284.8570556640625, "logps/rejected": -298.7757568359375, "loss": 0.3865, "rewards/accuracies": 0.75, "rewards/chosen": -0.1261647641658783, "rewards/margins": 1.1796964406967163, "rewards/rejected": -1.3058613538742065, "step": 6860 }, { "epoch": 0.79, "learning_rate": 6.366615942877209e-08, "logits/chosen": -3.5499696731567383, "logits/rejected": -3.4584922790527344, "logps/chosen": -256.90087890625, "logps/rejected": -313.9091491699219, "loss": 0.2595, "rewards/accuracies": 0.875, "rewards/chosen": -0.07758432626724243, "rewards/margins": 2.432985305786133, "rewards/rejected": -2.5105698108673096, "step": 6861 }, { "epoch": 0.79, "learning_rate": 6.363104295914783e-08, "logits/chosen": -2.6109440326690674, "logits/rejected": -2.77958083152771, "logps/chosen": -285.7755126953125, "logps/rejected": -317.174560546875, "loss": 0.4043, "rewards/accuracies": 0.75, "rewards/chosen": -0.7892271876335144, "rewards/margins": 2.113048553466797, "rewards/rejected": -2.902275562286377, "step": 6862 }, { "epoch": 0.79, "learning_rate": 6.359592648952359e-08, "logits/chosen": -3.326241970062256, "logits/rejected": -3.6112136840820312, "logps/chosen": -240.3668212890625, "logps/rejected": -319.6700439453125, "loss": 0.3822, "rewards/accuracies": 0.875, "rewards/chosen": 0.6217849254608154, "rewards/margins": 2.2391417026519775, "rewards/rejected": -1.617356538772583, "step": 6863 }, { "epoch": 0.79, "learning_rate": 6.356081001989933e-08, "logits/chosen": -2.931922435760498, "logits/rejected": -2.992692470550537, "logps/chosen": -202.0928192138672, "logps/rejected": -172.689208984375, "loss": 0.164, "rewards/accuracies": 1.0, "rewards/chosen": 0.21721456944942474, "rewards/margins": 2.461024284362793, "rewards/rejected": -2.243809700012207, "step": 6864 }, { "epoch": 0.79, "learning_rate": 6.352569355027507e-08, "logits/chosen": -2.940180778503418, "logits/rejected": -2.844433307647705, "logps/chosen": -400.3804931640625, "logps/rejected": -334.21319580078125, "loss": 0.5852, "rewards/accuracies": 0.625, "rewards/chosen": 0.3360590934753418, "rewards/margins": 1.5003061294555664, "rewards/rejected": -1.164246916770935, "step": 6865 }, { "epoch": 0.79, "learning_rate": 6.349057708065082e-08, "logits/chosen": -2.623579502105713, "logits/rejected": -2.619565010070801, "logps/chosen": -155.14590454101562, "logps/rejected": -297.96221923828125, "loss": 0.3125, "rewards/accuracies": 0.75, "rewards/chosen": 0.6103005409240723, "rewards/margins": 2.128880500793457, "rewards/rejected": -1.5185800790786743, "step": 6866 }, { "epoch": 0.79, "learning_rate": 6.345546061102656e-08, "logits/chosen": -3.420469284057617, "logits/rejected": -3.5028417110443115, "logps/chosen": -313.4007263183594, "logps/rejected": -336.6433410644531, "loss": 0.2345, "rewards/accuracies": 1.0, "rewards/chosen": 0.06480172276496887, "rewards/margins": 2.2274088859558105, "rewards/rejected": -2.162606954574585, "step": 6867 }, { "epoch": 0.79, "learning_rate": 6.342034414140232e-08, "logits/chosen": -3.0210304260253906, "logits/rejected": -3.022796869277954, "logps/chosen": -139.5048370361328, "logps/rejected": -221.88027954101562, "loss": 0.2173, "rewards/accuracies": 1.0, "rewards/chosen": -0.10810017585754395, "rewards/margins": 3.3061201572418213, "rewards/rejected": -3.4142203330993652, "step": 6868 }, { "epoch": 0.79, "learning_rate": 6.338522767177806e-08, "logits/chosen": -2.8449249267578125, "logits/rejected": -2.953348159790039, "logps/chosen": -201.3939208984375, "logps/rejected": -243.2868194580078, "loss": 0.2798, "rewards/accuracies": 0.875, "rewards/chosen": 0.14871792495250702, "rewards/margins": 2.3179965019226074, "rewards/rejected": -2.169278383255005, "step": 6869 }, { "epoch": 0.79, "learning_rate": 6.335011120215381e-08, "logits/chosen": -2.5218968391418457, "logits/rejected": -2.669893503189087, "logps/chosen": -384.6403503417969, "logps/rejected": -268.40850830078125, "loss": 0.2024, "rewards/accuracies": 1.0, "rewards/chosen": 0.5844606757164001, "rewards/margins": 2.635908603668213, "rewards/rejected": -2.051448106765747, "step": 6870 }, { "epoch": 0.79, "learning_rate": 6.331499473252955e-08, "logits/chosen": -3.293840169906616, "logits/rejected": -3.332963466644287, "logps/chosen": -173.6635284423828, "logps/rejected": -100.22616577148438, "loss": 0.5461, "rewards/accuracies": 0.5, "rewards/chosen": -0.044323503971099854, "rewards/margins": 0.6928259134292603, "rewards/rejected": -0.7371494174003601, "step": 6871 }, { "epoch": 0.79, "learning_rate": 6.32798782629053e-08, "logits/chosen": -2.722712516784668, "logits/rejected": -3.0634419918060303, "logps/chosen": -238.21566772460938, "logps/rejected": -293.7730407714844, "loss": 0.2832, "rewards/accuracies": 0.875, "rewards/chosen": 0.05865603685379028, "rewards/margins": 2.214850664138794, "rewards/rejected": -2.1561944484710693, "step": 6872 }, { "epoch": 0.79, "learning_rate": 6.324476179328105e-08, "logits/chosen": -2.326263427734375, "logits/rejected": -2.5099539756774902, "logps/chosen": -237.26934814453125, "logps/rejected": -272.9694519042969, "loss": 0.2892, "rewards/accuracies": 0.875, "rewards/chosen": 0.09439561516046524, "rewards/margins": 2.2884979248046875, "rewards/rejected": -2.1941025257110596, "step": 6873 }, { "epoch": 0.79, "learning_rate": 6.32096453236568e-08, "logits/chosen": -3.479593515396118, "logits/rejected": -3.4320316314697266, "logps/chosen": -281.2029724121094, "logps/rejected": -246.97161865234375, "loss": 0.086, "rewards/accuracies": 1.0, "rewards/chosen": 0.5093991160392761, "rewards/margins": 3.6741886138916016, "rewards/rejected": -3.1647894382476807, "step": 6874 }, { "epoch": 0.79, "learning_rate": 6.317452885403254e-08, "logits/chosen": -3.695598840713501, "logits/rejected": -3.857558250427246, "logps/chosen": -83.91822814941406, "logps/rejected": -229.68289184570312, "loss": 0.3885, "rewards/accuracies": 0.875, "rewards/chosen": -0.2006939798593521, "rewards/margins": 1.437912940979004, "rewards/rejected": -1.6386069059371948, "step": 6875 }, { "epoch": 0.79, "learning_rate": 6.313941238440828e-08, "logits/chosen": -3.2645087242126465, "logits/rejected": -3.4484758377075195, "logps/chosen": -386.1628112792969, "logps/rejected": -269.38214111328125, "loss": 0.324, "rewards/accuracies": 0.875, "rewards/chosen": 0.2500818073749542, "rewards/margins": 2.142085313796997, "rewards/rejected": -1.8920035362243652, "step": 6876 }, { "epoch": 0.79, "learning_rate": 6.310429591478403e-08, "logits/chosen": -2.999725580215454, "logits/rejected": -3.0121803283691406, "logps/chosen": -245.6441650390625, "logps/rejected": -250.72647094726562, "loss": 0.622, "rewards/accuracies": 0.625, "rewards/chosen": -0.6914530396461487, "rewards/margins": 0.8629512786865234, "rewards/rejected": -1.5544042587280273, "step": 6877 }, { "epoch": 0.79, "learning_rate": 6.306917944515977e-08, "logits/chosen": -3.822054386138916, "logits/rejected": -4.000582695007324, "logps/chosen": -298.1544494628906, "logps/rejected": -362.7718505859375, "loss": 0.4517, "rewards/accuracies": 0.75, "rewards/chosen": -0.3701764941215515, "rewards/margins": 1.3521809577941895, "rewards/rejected": -1.7223577499389648, "step": 6878 }, { "epoch": 0.79, "learning_rate": 6.303406297553552e-08, "logits/chosen": -2.864751100540161, "logits/rejected": -3.339650869369507, "logps/chosen": -184.45489501953125, "logps/rejected": -283.1103820800781, "loss": 0.379, "rewards/accuracies": 0.75, "rewards/chosen": 0.3413684368133545, "rewards/margins": 2.1201016902923584, "rewards/rejected": -1.778733253479004, "step": 6879 }, { "epoch": 0.79, "learning_rate": 6.299894650591127e-08, "logits/chosen": -2.5102121829986572, "logits/rejected": -2.468625545501709, "logps/chosen": -313.90234375, "logps/rejected": -302.5935974121094, "loss": 0.4965, "rewards/accuracies": 0.75, "rewards/chosen": 0.2958912253379822, "rewards/margins": 1.7813442945480347, "rewards/rejected": -1.4854531288146973, "step": 6880 }, { "epoch": 0.79, "learning_rate": 6.296383003628701e-08, "logits/chosen": -2.8440263271331787, "logits/rejected": -2.8773467540740967, "logps/chosen": -390.4270935058594, "logps/rejected": -218.5861358642578, "loss": 0.3455, "rewards/accuracies": 0.875, "rewards/chosen": -0.028527140617370605, "rewards/margins": 1.5096914768218994, "rewards/rejected": -1.5382184982299805, "step": 6881 }, { "epoch": 0.79, "learning_rate": 6.292871356666276e-08, "logits/chosen": -2.917264938354492, "logits/rejected": -3.1138463020324707, "logps/chosen": -405.18316650390625, "logps/rejected": -313.18994140625, "loss": 0.352, "rewards/accuracies": 0.875, "rewards/chosen": 0.22794890403747559, "rewards/margins": 2.0674829483032227, "rewards/rejected": -1.8395342826843262, "step": 6882 }, { "epoch": 0.79, "learning_rate": 6.28935970970385e-08, "logits/chosen": -2.9692158699035645, "logits/rejected": -3.1525657176971436, "logps/chosen": -166.4990234375, "logps/rejected": -141.53643798828125, "loss": 0.3239, "rewards/accuracies": 1.0, "rewards/chosen": 0.14463672041893005, "rewards/margins": 1.2622301578521729, "rewards/rejected": -1.11759352684021, "step": 6883 }, { "epoch": 0.79, "learning_rate": 6.285848062741426e-08, "logits/chosen": -3.0730855464935303, "logits/rejected": -3.3860981464385986, "logps/chosen": -299.76727294921875, "logps/rejected": -227.4004669189453, "loss": 0.4042, "rewards/accuracies": 0.75, "rewards/chosen": -0.3222920596599579, "rewards/margins": 1.438347339630127, "rewards/rejected": -1.7606395483016968, "step": 6884 }, { "epoch": 0.79, "learning_rate": 6.282336415779e-08, "logits/chosen": -1.9104608297348022, "logits/rejected": -1.9323811531066895, "logps/chosen": -243.21144104003906, "logps/rejected": -290.5775451660156, "loss": 0.4571, "rewards/accuracies": 0.75, "rewards/chosen": -0.024943001568317413, "rewards/margins": 0.9101272225379944, "rewards/rejected": -0.935070276260376, "step": 6885 }, { "epoch": 0.79, "learning_rate": 6.278824768816575e-08, "logits/chosen": -2.300570249557495, "logits/rejected": -2.4551186561584473, "logps/chosen": -201.5582275390625, "logps/rejected": -360.60711669921875, "loss": 0.339, "rewards/accuracies": 0.75, "rewards/chosen": -0.3555089831352234, "rewards/margins": 2.3074445724487305, "rewards/rejected": -2.6629533767700195, "step": 6886 }, { "epoch": 0.79, "learning_rate": 6.275313121854149e-08, "logits/chosen": -2.9524686336517334, "logits/rejected": -3.247201919555664, "logps/chosen": -156.8269500732422, "logps/rejected": -273.0232238769531, "loss": 0.2901, "rewards/accuracies": 0.75, "rewards/chosen": -0.08610832691192627, "rewards/margins": 3.798002004623413, "rewards/rejected": -3.88411021232605, "step": 6887 }, { "epoch": 0.79, "learning_rate": 6.271801474891725e-08, "logits/chosen": -2.7733888626098633, "logits/rejected": -2.8369503021240234, "logps/chosen": -333.38275146484375, "logps/rejected": -369.7525634765625, "loss": 0.3303, "rewards/accuracies": 0.875, "rewards/chosen": -0.03532882034778595, "rewards/margins": 1.796326756477356, "rewards/rejected": -1.8316556215286255, "step": 6888 }, { "epoch": 0.79, "learning_rate": 6.268289827929299e-08, "logits/chosen": -3.195446491241455, "logits/rejected": -3.092073917388916, "logps/chosen": -237.06082153320312, "logps/rejected": -227.07858276367188, "loss": 0.331, "rewards/accuracies": 0.75, "rewards/chosen": 0.20463049411773682, "rewards/margins": 1.7048801183700562, "rewards/rejected": -1.5002496242523193, "step": 6889 }, { "epoch": 0.79, "learning_rate": 6.264778180966873e-08, "logits/chosen": -2.7557051181793213, "logits/rejected": -3.221975564956665, "logps/chosen": -137.88192749023438, "logps/rejected": -329.6307678222656, "loss": 0.4012, "rewards/accuracies": 0.875, "rewards/chosen": 0.28547370433807373, "rewards/margins": 1.6646965742111206, "rewards/rejected": -1.3792228698730469, "step": 6890 }, { "epoch": 0.79, "learning_rate": 6.261266534004448e-08, "logits/chosen": -3.069709300994873, "logits/rejected": -2.861220359802246, "logps/chosen": -302.48358154296875, "logps/rejected": -173.47406005859375, "loss": 0.4968, "rewards/accuracies": 0.75, "rewards/chosen": -0.5652791857719421, "rewards/margins": 0.6199121475219727, "rewards/rejected": -1.1851913928985596, "step": 6891 }, { "epoch": 0.79, "learning_rate": 6.257754887042022e-08, "logits/chosen": -3.047985076904297, "logits/rejected": -3.5076189041137695, "logps/chosen": -239.9783935546875, "logps/rejected": -179.6811981201172, "loss": 0.213, "rewards/accuracies": 1.0, "rewards/chosen": 0.08551289141178131, "rewards/margins": 2.653794050216675, "rewards/rejected": -2.5682809352874756, "step": 6892 }, { "epoch": 0.79, "learning_rate": 6.254243240079596e-08, "logits/chosen": -3.0185861587524414, "logits/rejected": -2.981876850128174, "logps/chosen": -132.935546875, "logps/rejected": -149.70928955078125, "loss": 0.3026, "rewards/accuracies": 1.0, "rewards/chosen": -0.33074119687080383, "rewards/margins": 2.257427453994751, "rewards/rejected": -2.5881686210632324, "step": 6893 }, { "epoch": 0.79, "learning_rate": 6.250731593117172e-08, "logits/chosen": -2.7466025352478027, "logits/rejected": -3.099634885787964, "logps/chosen": -374.97882080078125, "logps/rejected": -318.3617858886719, "loss": 0.1656, "rewards/accuracies": 1.0, "rewards/chosen": 0.6082127094268799, "rewards/margins": 2.6734707355499268, "rewards/rejected": -2.0652577877044678, "step": 6894 }, { "epoch": 0.79, "learning_rate": 6.247219946154746e-08, "logits/chosen": -3.1095850467681885, "logits/rejected": -2.889153480529785, "logps/chosen": -264.2078857421875, "logps/rejected": -310.78070068359375, "loss": 0.5093, "rewards/accuracies": 0.75, "rewards/chosen": -0.1999027281999588, "rewards/margins": 2.5612244606018066, "rewards/rejected": -2.761127233505249, "step": 6895 }, { "epoch": 0.79, "learning_rate": 6.243708299192321e-08, "logits/chosen": -2.182610034942627, "logits/rejected": -2.1407625675201416, "logps/chosen": -272.81500244140625, "logps/rejected": -214.58163452148438, "loss": 0.6788, "rewards/accuracies": 0.625, "rewards/chosen": 0.05769185721874237, "rewards/margins": 0.58591628074646, "rewards/rejected": -0.5282244086265564, "step": 6896 }, { "epoch": 0.8, "learning_rate": 6.240196652229895e-08, "logits/chosen": -3.597557306289673, "logits/rejected": -3.5217063426971436, "logps/chosen": -265.26788330078125, "logps/rejected": -181.65621948242188, "loss": 0.3204, "rewards/accuracies": 0.875, "rewards/chosen": 0.1283518373966217, "rewards/margins": 2.2489266395568848, "rewards/rejected": -2.120574951171875, "step": 6897 }, { "epoch": 0.8, "learning_rate": 6.23668500526747e-08, "logits/chosen": -2.474308729171753, "logits/rejected": -2.304273843765259, "logps/chosen": -155.16506958007812, "logps/rejected": -262.0389099121094, "loss": 0.7607, "rewards/accuracies": 0.625, "rewards/chosen": -0.3279498219490051, "rewards/margins": 0.6810014247894287, "rewards/rejected": -1.008951187133789, "step": 6898 }, { "epoch": 0.8, "learning_rate": 6.233173358305045e-08, "logits/chosen": -2.9308013916015625, "logits/rejected": -2.7349352836608887, "logps/chosen": -293.2691955566406, "logps/rejected": -327.2069091796875, "loss": 0.2305, "rewards/accuracies": 0.875, "rewards/chosen": -0.009263008832931519, "rewards/margins": 1.9479718208312988, "rewards/rejected": -1.9572348594665527, "step": 6899 }, { "epoch": 0.8, "learning_rate": 6.22966171134262e-08, "logits/chosen": -3.306802272796631, "logits/rejected": -3.810076951980591, "logps/chosen": -296.97674560546875, "logps/rejected": -372.856689453125, "loss": 0.2529, "rewards/accuracies": 1.0, "rewards/chosen": -0.48467814922332764, "rewards/margins": 2.0994582176208496, "rewards/rejected": -2.584136486053467, "step": 6900 }, { "epoch": 0.8, "learning_rate": 6.226150064380194e-08, "logits/chosen": -2.7982587814331055, "logits/rejected": -3.0903711318969727, "logps/chosen": -273.94537353515625, "logps/rejected": -270.28802490234375, "loss": 0.3282, "rewards/accuracies": 0.875, "rewards/chosen": 0.5703680515289307, "rewards/margins": 2.5379395484924316, "rewards/rejected": -1.967571496963501, "step": 6901 }, { "epoch": 0.8, "learning_rate": 6.22263841741777e-08, "logits/chosen": -3.9200849533081055, "logits/rejected": -4.1158246994018555, "logps/chosen": -100.15008544921875, "logps/rejected": -110.8628921508789, "loss": 0.6877, "rewards/accuracies": 0.75, "rewards/chosen": 0.07611338794231415, "rewards/margins": 1.6607253551483154, "rewards/rejected": -1.5846121311187744, "step": 6902 }, { "epoch": 0.8, "learning_rate": 6.219126770455344e-08, "logits/chosen": -2.787980794906616, "logits/rejected": -2.9087626934051514, "logps/chosen": -195.42503356933594, "logps/rejected": -332.37939453125, "loss": 0.4914, "rewards/accuracies": 0.75, "rewards/chosen": -0.18333151936531067, "rewards/margins": 2.3424930572509766, "rewards/rejected": -2.5258243083953857, "step": 6903 }, { "epoch": 0.8, "learning_rate": 6.215615123492918e-08, "logits/chosen": -2.5948338508605957, "logits/rejected": -2.721376895904541, "logps/chosen": -254.96873474121094, "logps/rejected": -193.92007446289062, "loss": 0.4182, "rewards/accuracies": 0.875, "rewards/chosen": -0.17162173986434937, "rewards/margins": 1.5634822845458984, "rewards/rejected": -1.7351038455963135, "step": 6904 }, { "epoch": 0.8, "learning_rate": 6.212103476530493e-08, "logits/chosen": -2.6099069118499756, "logits/rejected": -2.510551691055298, "logps/chosen": -172.3911895751953, "logps/rejected": -141.64857482910156, "loss": 0.7413, "rewards/accuracies": 0.5, "rewards/chosen": -0.8225361108779907, "rewards/margins": 0.17527207732200623, "rewards/rejected": -0.9978082180023193, "step": 6905 }, { "epoch": 0.8, "learning_rate": 6.208591829568067e-08, "logits/chosen": -3.2003750801086426, "logits/rejected": -3.176957607269287, "logps/chosen": -231.46014404296875, "logps/rejected": -241.0935516357422, "loss": 0.8482, "rewards/accuracies": 0.625, "rewards/chosen": -0.8179933428764343, "rewards/margins": 0.5122226476669312, "rewards/rejected": -1.3302160501480103, "step": 6906 }, { "epoch": 0.8, "learning_rate": 6.205080182605641e-08, "logits/chosen": -3.539546012878418, "logits/rejected": -3.355071783065796, "logps/chosen": -377.757080078125, "logps/rejected": -294.52069091796875, "loss": 0.2315, "rewards/accuracies": 0.875, "rewards/chosen": -0.13650892674922943, "rewards/margins": 2.0078136920928955, "rewards/rejected": -2.144322395324707, "step": 6907 }, { "epoch": 0.8, "learning_rate": 6.201568535643217e-08, "logits/chosen": -2.9643073081970215, "logits/rejected": -2.795623779296875, "logps/chosen": -159.75967407226562, "logps/rejected": -219.87701416015625, "loss": 0.4528, "rewards/accuracies": 0.625, "rewards/chosen": -0.4945213794708252, "rewards/margins": 1.444279670715332, "rewards/rejected": -1.9388009309768677, "step": 6908 }, { "epoch": 0.8, "learning_rate": 6.19805688868079e-08, "logits/chosen": -2.997471809387207, "logits/rejected": -3.167635440826416, "logps/chosen": -222.56500244140625, "logps/rejected": -184.3179168701172, "loss": 0.4369, "rewards/accuracies": 0.75, "rewards/chosen": -0.40444353222846985, "rewards/margins": 1.0463972091674805, "rewards/rejected": -1.4508408308029175, "step": 6909 }, { "epoch": 0.8, "learning_rate": 6.194545241718365e-08, "logits/chosen": -2.7558436393737793, "logits/rejected": -2.6311488151550293, "logps/chosen": -243.82489013671875, "logps/rejected": -283.2449951171875, "loss": 0.1394, "rewards/accuracies": 1.0, "rewards/chosen": 0.040884919464588165, "rewards/margins": 2.5031933784484863, "rewards/rejected": -2.462308883666992, "step": 6910 }, { "epoch": 0.8, "learning_rate": 6.19103359475594e-08, "logits/chosen": -3.2598183155059814, "logits/rejected": -3.002974033355713, "logps/chosen": -762.5930786132812, "logps/rejected": -241.87672424316406, "loss": 0.2788, "rewards/accuracies": 0.875, "rewards/chosen": 0.26440906524658203, "rewards/margins": 1.8656747341156006, "rewards/rejected": -1.6012656688690186, "step": 6911 }, { "epoch": 0.8, "learning_rate": 6.187521947793514e-08, "logits/chosen": -3.274794578552246, "logits/rejected": -2.8384978771209717, "logps/chosen": -280.97576904296875, "logps/rejected": -170.1022491455078, "loss": 0.3195, "rewards/accuracies": 0.75, "rewards/chosen": -0.31578171253204346, "rewards/margins": 1.8131417036056519, "rewards/rejected": -2.1289234161376953, "step": 6912 }, { "epoch": 0.8, "learning_rate": 6.18401030083109e-08, "logits/chosen": -3.918534278869629, "logits/rejected": -3.837541341781616, "logps/chosen": -230.48626708984375, "logps/rejected": -130.49130249023438, "loss": 0.362, "rewards/accuracies": 0.875, "rewards/chosen": -0.43590372800827026, "rewards/margins": 1.5405082702636719, "rewards/rejected": -1.976412057876587, "step": 6913 }, { "epoch": 0.8, "learning_rate": 6.180498653868664e-08, "logits/chosen": -2.946974277496338, "logits/rejected": -2.5889389514923096, "logps/chosen": -313.0987243652344, "logps/rejected": -469.38592529296875, "loss": 0.4123, "rewards/accuracies": 0.875, "rewards/chosen": -0.38947802782058716, "rewards/margins": 1.4635546207427979, "rewards/rejected": -1.8530325889587402, "step": 6914 }, { "epoch": 0.8, "learning_rate": 6.176987006906239e-08, "logits/chosen": -3.1787405014038086, "logits/rejected": -2.780752182006836, "logps/chosen": -297.644775390625, "logps/rejected": -498.3953857421875, "loss": 0.3652, "rewards/accuracies": 0.875, "rewards/chosen": -0.1990986466407776, "rewards/margins": 1.4441978931427002, "rewards/rejected": -1.643296480178833, "step": 6915 }, { "epoch": 0.8, "learning_rate": 6.173475359943813e-08, "logits/chosen": -2.603199005126953, "logits/rejected": -2.515979528427124, "logps/chosen": -389.23907470703125, "logps/rejected": -172.3328857421875, "loss": 0.2646, "rewards/accuracies": 1.0, "rewards/chosen": 0.32855284214019775, "rewards/margins": 1.4370816946029663, "rewards/rejected": -1.1085288524627686, "step": 6916 }, { "epoch": 0.8, "learning_rate": 6.169963712981388e-08, "logits/chosen": -2.464099407196045, "logits/rejected": -2.589663505554199, "logps/chosen": -338.3741455078125, "logps/rejected": -247.61146545410156, "loss": 0.4167, "rewards/accuracies": 0.75, "rewards/chosen": -0.9013187885284424, "rewards/margins": 1.0838890075683594, "rewards/rejected": -1.9852077960968018, "step": 6917 }, { "epoch": 0.8, "learning_rate": 6.166452066018962e-08, "logits/chosen": -2.22609543800354, "logits/rejected": -2.554272413253784, "logps/chosen": -494.99359130859375, "logps/rejected": -327.25946044921875, "loss": 0.2194, "rewards/accuracies": 1.0, "rewards/chosen": 1.1254196166992188, "rewards/margins": 1.926288366317749, "rewards/rejected": -0.8008686304092407, "step": 6918 }, { "epoch": 0.8, "learning_rate": 6.162940419056538e-08, "logits/chosen": -2.949626922607422, "logits/rejected": -2.933156967163086, "logps/chosen": -343.3488464355469, "logps/rejected": -282.4388732910156, "loss": 0.2131, "rewards/accuracies": 1.0, "rewards/chosen": 0.34560346603393555, "rewards/margins": 1.7155488729476929, "rewards/rejected": -1.3699454069137573, "step": 6919 }, { "epoch": 0.8, "learning_rate": 6.159428772094112e-08, "logits/chosen": -3.5891637802124023, "logits/rejected": -3.947050094604492, "logps/chosen": -260.6293640136719, "logps/rejected": -343.9156188964844, "loss": 0.6376, "rewards/accuracies": 0.75, "rewards/chosen": -0.533272385597229, "rewards/margins": 0.4418676197528839, "rewards/rejected": -0.9751399755477905, "step": 6920 }, { "epoch": 0.8, "learning_rate": 6.155917125131686e-08, "logits/chosen": -3.2585983276367188, "logits/rejected": -3.1968188285827637, "logps/chosen": -88.00006866455078, "logps/rejected": -175.18128967285156, "loss": 0.3037, "rewards/accuracies": 0.875, "rewards/chosen": -0.21215826272964478, "rewards/margins": 2.4365768432617188, "rewards/rejected": -2.648735284805298, "step": 6921 }, { "epoch": 0.8, "learning_rate": 6.152405478169261e-08, "logits/chosen": -2.611107587814331, "logits/rejected": -2.6224775314331055, "logps/chosen": -433.9884948730469, "logps/rejected": -303.4171142578125, "loss": 0.2259, "rewards/accuracies": 0.875, "rewards/chosen": -0.1996743083000183, "rewards/margins": 2.1115474700927734, "rewards/rejected": -2.3112220764160156, "step": 6922 }, { "epoch": 0.8, "learning_rate": 6.148893831206835e-08, "logits/chosen": -3.101010322570801, "logits/rejected": -2.757798433303833, "logps/chosen": -274.4108581542969, "logps/rejected": -173.5542755126953, "loss": 0.2533, "rewards/accuracies": 1.0, "rewards/chosen": 0.44520407915115356, "rewards/margins": 2.0208957195281982, "rewards/rejected": -1.5756916999816895, "step": 6923 }, { "epoch": 0.8, "learning_rate": 6.14538218424441e-08, "logits/chosen": -3.267371654510498, "logits/rejected": -3.592923641204834, "logps/chosen": -254.95144653320312, "logps/rejected": -304.2880554199219, "loss": 0.4551, "rewards/accuracies": 0.625, "rewards/chosen": -0.27280062437057495, "rewards/margins": 2.593517303466797, "rewards/rejected": -2.8663182258605957, "step": 6924 }, { "epoch": 0.8, "learning_rate": 6.141870537281985e-08, "logits/chosen": -2.834172487258911, "logits/rejected": -2.7362983226776123, "logps/chosen": -182.0997314453125, "logps/rejected": -315.0140075683594, "loss": 0.3146, "rewards/accuracies": 0.875, "rewards/chosen": 0.1976601481437683, "rewards/margins": 2.2280611991882324, "rewards/rejected": -2.0304012298583984, "step": 6925 }, { "epoch": 0.8, "learning_rate": 6.138358890319559e-08, "logits/chosen": -2.9553768634796143, "logits/rejected": -2.9969754219055176, "logps/chosen": -225.42721557617188, "logps/rejected": -342.064208984375, "loss": 0.4131, "rewards/accuracies": 0.75, "rewards/chosen": 0.4038824439048767, "rewards/margins": 2.1553030014038086, "rewards/rejected": -1.7514206171035767, "step": 6926 }, { "epoch": 0.8, "learning_rate": 6.134847243357134e-08, "logits/chosen": -3.8258414268493652, "logits/rejected": -3.7468862533569336, "logps/chosen": -259.67449951171875, "logps/rejected": -223.55364990234375, "loss": 0.6097, "rewards/accuracies": 0.625, "rewards/chosen": -0.7806622982025146, "rewards/margins": 0.4712674021720886, "rewards/rejected": -1.2519296407699585, "step": 6927 }, { "epoch": 0.8, "learning_rate": 6.131335596394708e-08, "logits/chosen": -2.5032310485839844, "logits/rejected": -2.8347997665405273, "logps/chosen": -174.287841796875, "logps/rejected": -136.6529998779297, "loss": 0.6625, "rewards/accuracies": 0.5, "rewards/chosen": -0.333992063999176, "rewards/margins": 1.1687606573104858, "rewards/rejected": -1.502752661705017, "step": 6928 }, { "epoch": 0.8, "learning_rate": 6.127823949432284e-08, "logits/chosen": -2.940782070159912, "logits/rejected": -2.869699001312256, "logps/chosen": -409.40771484375, "logps/rejected": -196.79666137695312, "loss": 0.2217, "rewards/accuracies": 1.0, "rewards/chosen": 0.12029287219047546, "rewards/margins": 2.224592685699463, "rewards/rejected": -2.104300022125244, "step": 6929 }, { "epoch": 0.8, "learning_rate": 6.124312302469858e-08, "logits/chosen": -3.107272148132324, "logits/rejected": -2.8803775310516357, "logps/chosen": -170.6290740966797, "logps/rejected": -226.51034545898438, "loss": 0.4088, "rewards/accuracies": 0.625, "rewards/chosen": 0.20312818884849548, "rewards/margins": 2.205857753753662, "rewards/rejected": -2.002729654312134, "step": 6930 }, { "epoch": 0.8, "learning_rate": 6.120800655507433e-08, "logits/chosen": -3.258471727371216, "logits/rejected": -3.3949851989746094, "logps/chosen": -278.8154296875, "logps/rejected": -296.8592529296875, "loss": 0.3238, "rewards/accuracies": 0.75, "rewards/chosen": 0.4642886519432068, "rewards/margins": 2.1703591346740723, "rewards/rejected": -1.7060701847076416, "step": 6931 }, { "epoch": 0.8, "learning_rate": 6.117289008545007e-08, "logits/chosen": -3.1768593788146973, "logits/rejected": -3.000978946685791, "logps/chosen": -283.0621337890625, "logps/rejected": -188.79864501953125, "loss": 0.3427, "rewards/accuracies": 0.875, "rewards/chosen": 0.14919519424438477, "rewards/margins": 1.2274787425994873, "rewards/rejected": -1.0782835483551025, "step": 6932 }, { "epoch": 0.8, "learning_rate": 6.113777361582583e-08, "logits/chosen": -2.4183101654052734, "logits/rejected": -2.380706787109375, "logps/chosen": -356.02069091796875, "logps/rejected": -338.8557434082031, "loss": 0.3731, "rewards/accuracies": 0.75, "rewards/chosen": -0.05462533235549927, "rewards/margins": 1.1229419708251953, "rewards/rejected": -1.1775673627853394, "step": 6933 }, { "epoch": 0.8, "learning_rate": 6.110265714620157e-08, "logits/chosen": -3.0102524757385254, "logits/rejected": -2.858203172683716, "logps/chosen": -232.3582763671875, "logps/rejected": -268.3809509277344, "loss": 0.5292, "rewards/accuracies": 0.625, "rewards/chosen": -0.356096476316452, "rewards/margins": 0.9975271224975586, "rewards/rejected": -1.353623628616333, "step": 6934 }, { "epoch": 0.8, "learning_rate": 6.106754067657731e-08, "logits/chosen": -3.631277084350586, "logits/rejected": -3.4281816482543945, "logps/chosen": -377.9190368652344, "logps/rejected": -270.5397644042969, "loss": 0.523, "rewards/accuracies": 0.625, "rewards/chosen": -0.14649976789951324, "rewards/margins": 1.7951287031173706, "rewards/rejected": -1.9416284561157227, "step": 6935 }, { "epoch": 0.8, "learning_rate": 6.103242420695306e-08, "logits/chosen": -2.726210594177246, "logits/rejected": -2.5786521434783936, "logps/chosen": -360.8613586425781, "logps/rejected": -198.5255126953125, "loss": 0.5184, "rewards/accuracies": 0.75, "rewards/chosen": -0.44467517733573914, "rewards/margins": 1.5484180450439453, "rewards/rejected": -1.993093490600586, "step": 6936 }, { "epoch": 0.8, "learning_rate": 6.09973077373288e-08, "logits/chosen": -3.5762124061584473, "logits/rejected": -3.3063290119171143, "logps/chosen": -138.4512939453125, "logps/rejected": -213.74795532226562, "loss": 0.3364, "rewards/accuracies": 1.0, "rewards/chosen": -0.4511685371398926, "rewards/margins": 1.6646990776062012, "rewards/rejected": -2.1158676147460938, "step": 6937 }, { "epoch": 0.8, "learning_rate": 6.096219126770454e-08, "logits/chosen": -3.0415868759155273, "logits/rejected": -3.5145955085754395, "logps/chosen": -169.2500457763672, "logps/rejected": -188.27256774902344, "loss": 0.3711, "rewards/accuracies": 0.875, "rewards/chosen": -0.2367323935031891, "rewards/margins": 1.586731195449829, "rewards/rejected": -1.8234636783599854, "step": 6938 }, { "epoch": 0.8, "learning_rate": 6.09270747980803e-08, "logits/chosen": -3.4779908657073975, "logits/rejected": -3.5100722312927246, "logps/chosen": -212.20701599121094, "logps/rejected": -202.8245849609375, "loss": 0.2601, "rewards/accuracies": 1.0, "rewards/chosen": 0.007272630929946899, "rewards/margins": 1.5324013233184814, "rewards/rejected": -1.5251288414001465, "step": 6939 }, { "epoch": 0.8, "learning_rate": 6.089195832845604e-08, "logits/chosen": -3.240521192550659, "logits/rejected": -3.134890079498291, "logps/chosen": -263.38372802734375, "logps/rejected": -176.42288208007812, "loss": 0.4734, "rewards/accuracies": 0.625, "rewards/chosen": 0.20650431513786316, "rewards/margins": 0.7102005481719971, "rewards/rejected": -0.5036962032318115, "step": 6940 }, { "epoch": 0.8, "learning_rate": 6.085684185883179e-08, "logits/chosen": -2.7310709953308105, "logits/rejected": -2.876204013824463, "logps/chosen": -345.68121337890625, "logps/rejected": -270.18353271484375, "loss": 1.2103, "rewards/accuracies": 0.5, "rewards/chosen": -0.5730695128440857, "rewards/margins": -0.058474019169807434, "rewards/rejected": -0.5145954489707947, "step": 6941 }, { "epoch": 0.8, "learning_rate": 6.082172538920753e-08, "logits/chosen": -3.2182700634002686, "logits/rejected": -3.009902000427246, "logps/chosen": -184.67543029785156, "logps/rejected": -218.2108917236328, "loss": 0.3302, "rewards/accuracies": 0.875, "rewards/chosen": -0.49612462520599365, "rewards/margins": 1.9733840227127075, "rewards/rejected": -2.469508647918701, "step": 6942 }, { "epoch": 0.8, "learning_rate": 6.078660891958329e-08, "logits/chosen": -2.7201318740844727, "logits/rejected": -2.8363242149353027, "logps/chosen": -177.3883514404297, "logps/rejected": -335.1317138671875, "loss": 0.3641, "rewards/accuracies": 0.875, "rewards/chosen": 0.01637636125087738, "rewards/margins": 1.9553626775741577, "rewards/rejected": -1.9389863014221191, "step": 6943 }, { "epoch": 0.8, "learning_rate": 6.075149244995903e-08, "logits/chosen": -3.4872140884399414, "logits/rejected": -3.4439072608947754, "logps/chosen": -108.7239990234375, "logps/rejected": -104.06349182128906, "loss": 0.4399, "rewards/accuracies": 0.875, "rewards/chosen": 0.31020018458366394, "rewards/margins": 1.2907323837280273, "rewards/rejected": -0.9805324077606201, "step": 6944 }, { "epoch": 0.8, "learning_rate": 6.071637598033478e-08, "logits/chosen": -3.571685791015625, "logits/rejected": -3.5475401878356934, "logps/chosen": -291.85186767578125, "logps/rejected": -167.1832733154297, "loss": 0.4401, "rewards/accuracies": 0.625, "rewards/chosen": -0.11681388318538666, "rewards/margins": 1.0881768465042114, "rewards/rejected": -1.2049907445907593, "step": 6945 }, { "epoch": 0.8, "learning_rate": 6.068125951071052e-08, "logits/chosen": -3.552715301513672, "logits/rejected": -3.6484150886535645, "logps/chosen": -149.06678771972656, "logps/rejected": -261.9591064453125, "loss": 0.2519, "rewards/accuracies": 1.0, "rewards/chosen": 0.11034746468067169, "rewards/margins": 2.1226608753204346, "rewards/rejected": -2.0123133659362793, "step": 6946 }, { "epoch": 0.8, "learning_rate": 6.064614304108627e-08, "logits/chosen": -3.2166638374328613, "logits/rejected": -2.947073459625244, "logps/chosen": -408.6716613769531, "logps/rejected": -231.74752807617188, "loss": 0.4649, "rewards/accuracies": 0.75, "rewards/chosen": -0.1596297025680542, "rewards/margins": 1.518135666847229, "rewards/rejected": -1.6777653694152832, "step": 6947 }, { "epoch": 0.8, "learning_rate": 6.061102657146202e-08, "logits/chosen": -3.2877695560455322, "logits/rejected": -3.192363739013672, "logps/chosen": -306.6596374511719, "logps/rejected": -312.8467102050781, "loss": 0.313, "rewards/accuracies": 1.0, "rewards/chosen": -0.31004273891448975, "rewards/margins": 1.2941596508026123, "rewards/rejected": -1.604202389717102, "step": 6948 }, { "epoch": 0.8, "learning_rate": 6.057591010183777e-08, "logits/chosen": -3.3210089206695557, "logits/rejected": -3.657902240753174, "logps/chosen": -322.0251159667969, "logps/rejected": -265.26373291015625, "loss": 0.5508, "rewards/accuracies": 0.75, "rewards/chosen": -0.4957643747329712, "rewards/margins": 1.5602669715881348, "rewards/rejected": -2.0560312271118164, "step": 6949 }, { "epoch": 0.8, "learning_rate": 6.054079363221351e-08, "logits/chosen": -2.9940710067749023, "logits/rejected": -2.818845510482788, "logps/chosen": -281.94183349609375, "logps/rejected": -253.54766845703125, "loss": 0.4584, "rewards/accuracies": 0.75, "rewards/chosen": 0.10900469124317169, "rewards/margins": 1.3168742656707764, "rewards/rejected": -1.207869529724121, "step": 6950 }, { "epoch": 0.8, "learning_rate": 6.050567716258925e-08, "logits/chosen": -2.786530017852783, "logits/rejected": -2.8202271461486816, "logps/chosen": -373.8701477050781, "logps/rejected": -268.00238037109375, "loss": 0.4243, "rewards/accuracies": 0.75, "rewards/chosen": 0.431793212890625, "rewards/margins": 0.9536972045898438, "rewards/rejected": -0.5219039916992188, "step": 6951 }, { "epoch": 0.8, "learning_rate": 6.047056069296499e-08, "logits/chosen": -3.3679487705230713, "logits/rejected": -3.334458351135254, "logps/chosen": -307.3662109375, "logps/rejected": -378.37200927734375, "loss": 0.2732, "rewards/accuracies": 0.75, "rewards/chosen": 0.3789092004299164, "rewards/margins": 3.028428316116333, "rewards/rejected": -2.6495189666748047, "step": 6952 }, { "epoch": 0.8, "learning_rate": 6.043544422334074e-08, "logits/chosen": -3.672128200531006, "logits/rejected": -3.238973379135132, "logps/chosen": -233.14779663085938, "logps/rejected": -182.70388793945312, "loss": 0.4148, "rewards/accuracies": 0.75, "rewards/chosen": -0.42179447412490845, "rewards/margins": 1.2678064107894897, "rewards/rejected": -1.689600944519043, "step": 6953 }, { "epoch": 0.8, "learning_rate": 6.040032775371649e-08, "logits/chosen": -3.3454489707946777, "logits/rejected": -3.46112060546875, "logps/chosen": -348.00030517578125, "logps/rejected": -336.1090087890625, "loss": 0.4976, "rewards/accuracies": 0.625, "rewards/chosen": -0.7184159159660339, "rewards/margins": 1.3401222229003906, "rewards/rejected": -2.0585379600524902, "step": 6954 }, { "epoch": 0.8, "learning_rate": 6.036521128409224e-08, "logits/chosen": -3.1476855278015137, "logits/rejected": -3.1050431728363037, "logps/chosen": -258.1007080078125, "logps/rejected": -290.1959228515625, "loss": 0.2843, "rewards/accuracies": 0.875, "rewards/chosen": -0.1695144772529602, "rewards/margins": 2.185655355453491, "rewards/rejected": -2.3551697731018066, "step": 6955 }, { "epoch": 0.8, "learning_rate": 6.033009481446798e-08, "logits/chosen": -2.3613100051879883, "logits/rejected": -2.4268555641174316, "logps/chosen": -299.7089538574219, "logps/rejected": -276.5127258300781, "loss": 0.2545, "rewards/accuracies": 0.875, "rewards/chosen": 0.6070857644081116, "rewards/margins": 2.0665175914764404, "rewards/rejected": -1.4594316482543945, "step": 6956 }, { "epoch": 0.8, "learning_rate": 6.029497834484372e-08, "logits/chosen": -3.551363945007324, "logits/rejected": -3.451401710510254, "logps/chosen": -248.30703735351562, "logps/rejected": -206.882080078125, "loss": 0.4627, "rewards/accuracies": 0.75, "rewards/chosen": -0.09178006649017334, "rewards/margins": 1.6204028129577637, "rewards/rejected": -1.712182641029358, "step": 6957 }, { "epoch": 0.8, "learning_rate": 6.025986187521947e-08, "logits/chosen": -3.796799659729004, "logits/rejected": -3.544156551361084, "logps/chosen": -254.96571350097656, "logps/rejected": -222.07470703125, "loss": 0.4221, "rewards/accuracies": 0.75, "rewards/chosen": -0.1552901417016983, "rewards/margins": 1.7896525859832764, "rewards/rejected": -1.944942831993103, "step": 6958 }, { "epoch": 0.8, "learning_rate": 6.022474540559521e-08, "logits/chosen": -3.125786066055298, "logits/rejected": -3.0213370323181152, "logps/chosen": -197.5426025390625, "logps/rejected": -224.67538452148438, "loss": 0.2828, "rewards/accuracies": 0.875, "rewards/chosen": 0.021479979157447815, "rewards/margins": 2.7360010147094727, "rewards/rejected": -2.7145206928253174, "step": 6959 }, { "epoch": 0.8, "learning_rate": 6.018962893597097e-08, "logits/chosen": -4.134567737579346, "logits/rejected": -4.047092437744141, "logps/chosen": -172.82894897460938, "logps/rejected": -163.48060607910156, "loss": 0.735, "rewards/accuracies": 0.375, "rewards/chosen": -0.3580150008201599, "rewards/margins": 0.7979310154914856, "rewards/rejected": -1.1559460163116455, "step": 6960 }, { "epoch": 0.8, "learning_rate": 6.015451246634671e-08, "logits/chosen": -3.091373920440674, "logits/rejected": -2.932126760482788, "logps/chosen": -392.9687805175781, "logps/rejected": -211.02505493164062, "loss": 0.3932, "rewards/accuracies": 0.75, "rewards/chosen": 0.034972578287124634, "rewards/margins": 1.2965271472930908, "rewards/rejected": -1.261554479598999, "step": 6961 }, { "epoch": 0.8, "learning_rate": 6.011939599672246e-08, "logits/chosen": -2.552109718322754, "logits/rejected": -2.490813732147217, "logps/chosen": -400.44610595703125, "logps/rejected": -294.38690185546875, "loss": 0.4477, "rewards/accuracies": 0.75, "rewards/chosen": 0.6155077219009399, "rewards/margins": 1.6363301277160645, "rewards/rejected": -1.020822525024414, "step": 6962 }, { "epoch": 0.8, "learning_rate": 6.00842795270982e-08, "logits/chosen": -3.0347847938537598, "logits/rejected": -2.9451870918273926, "logps/chosen": -239.75819396972656, "logps/rejected": -246.9307098388672, "loss": 0.3737, "rewards/accuracies": 0.875, "rewards/chosen": 0.3552280068397522, "rewards/margins": 2.036203384399414, "rewards/rejected": -1.680975317955017, "step": 6963 }, { "epoch": 0.8, "learning_rate": 6.004916305747396e-08, "logits/chosen": -3.2504942417144775, "logits/rejected": -3.2064993381500244, "logps/chosen": -162.08993530273438, "logps/rejected": -220.43035888671875, "loss": 0.5576, "rewards/accuracies": 0.75, "rewards/chosen": -0.5907617211341858, "rewards/margins": 0.9056981205940247, "rewards/rejected": -1.4964598417282104, "step": 6964 }, { "epoch": 0.8, "learning_rate": 6.00140465878497e-08, "logits/chosen": -2.9582526683807373, "logits/rejected": -2.9211533069610596, "logps/chosen": -326.4638366699219, "logps/rejected": -235.81405639648438, "loss": 0.2787, "rewards/accuracies": 0.875, "rewards/chosen": 0.18441319465637207, "rewards/margins": 2.012279510498047, "rewards/rejected": -1.8278663158416748, "step": 6965 }, { "epoch": 0.8, "learning_rate": 5.997893011822545e-08, "logits/chosen": -2.8860208988189697, "logits/rejected": -2.95339298248291, "logps/chosen": -553.3525390625, "logps/rejected": -364.57867431640625, "loss": 0.2466, "rewards/accuracies": 1.0, "rewards/chosen": 0.3985903263092041, "rewards/margins": 1.9060683250427246, "rewards/rejected": -1.5074779987335205, "step": 6966 }, { "epoch": 0.8, "learning_rate": 5.994381364860119e-08, "logits/chosen": -3.1683881282806396, "logits/rejected": -3.268204689025879, "logps/chosen": -209.6517333984375, "logps/rejected": -103.41073608398438, "loss": 0.5608, "rewards/accuracies": 0.625, "rewards/chosen": 0.02602100372314453, "rewards/margins": 0.48678308725357056, "rewards/rejected": -0.460762083530426, "step": 6967 }, { "epoch": 0.8, "learning_rate": 5.990869717897693e-08, "logits/chosen": -2.0475707054138184, "logits/rejected": -1.8462293148040771, "logps/chosen": -174.15228271484375, "logps/rejected": -299.77227783203125, "loss": 0.3458, "rewards/accuracies": 0.875, "rewards/chosen": 0.09349679201841354, "rewards/margins": 1.8732316493988037, "rewards/rejected": -1.7797349691390991, "step": 6968 }, { "epoch": 0.8, "learning_rate": 5.987358070935267e-08, "logits/chosen": -2.874101161956787, "logits/rejected": -3.023653745651245, "logps/chosen": -248.56207275390625, "logps/rejected": -206.61537170410156, "loss": 0.2285, "rewards/accuracies": 1.0, "rewards/chosen": 0.7519725561141968, "rewards/margins": 1.5925652980804443, "rewards/rejected": -0.840592622756958, "step": 6969 }, { "epoch": 0.8, "learning_rate": 5.983846423972843e-08, "logits/chosen": -2.492197036743164, "logits/rejected": -2.4313254356384277, "logps/chosen": -324.7337646484375, "logps/rejected": -346.2574157714844, "loss": 0.334, "rewards/accuracies": 0.875, "rewards/chosen": -0.1870432198047638, "rewards/margins": 1.3263425827026367, "rewards/rejected": -1.5133857727050781, "step": 6970 }, { "epoch": 0.8, "learning_rate": 5.980334777010417e-08, "logits/chosen": -3.0768752098083496, "logits/rejected": -2.810382843017578, "logps/chosen": -249.8797607421875, "logps/rejected": -206.31436157226562, "loss": 0.7506, "rewards/accuracies": 0.5, "rewards/chosen": -0.3719184100627899, "rewards/margins": 0.4831152856349945, "rewards/rejected": -0.8550336956977844, "step": 6971 }, { "epoch": 0.8, "learning_rate": 5.976823130047992e-08, "logits/chosen": -3.348515510559082, "logits/rejected": -3.0292627811431885, "logps/chosen": -175.6802520751953, "logps/rejected": -144.98263549804688, "loss": 0.3266, "rewards/accuracies": 0.875, "rewards/chosen": -0.24199409782886505, "rewards/margins": 1.965043544769287, "rewards/rejected": -2.2070374488830566, "step": 6972 }, { "epoch": 0.8, "learning_rate": 5.973311483085566e-08, "logits/chosen": -3.467180013656616, "logits/rejected": -3.434614658355713, "logps/chosen": -294.4759216308594, "logps/rejected": -309.9489440917969, "loss": 0.1091, "rewards/accuracies": 1.0, "rewards/chosen": 0.5537824034690857, "rewards/margins": 3.0176942348480225, "rewards/rejected": -2.463911771774292, "step": 6973 }, { "epoch": 0.8, "learning_rate": 5.969799836123142e-08, "logits/chosen": -3.4978654384613037, "logits/rejected": -3.0073935985565186, "logps/chosen": -456.33123779296875, "logps/rejected": -285.3265686035156, "loss": 0.4403, "rewards/accuracies": 0.875, "rewards/chosen": 0.08993859589099884, "rewards/margins": 1.3635438680648804, "rewards/rejected": -1.2736053466796875, "step": 6974 }, { "epoch": 0.8, "learning_rate": 5.966288189160716e-08, "logits/chosen": -3.3789520263671875, "logits/rejected": -3.3220598697662354, "logps/chosen": -181.70730590820312, "logps/rejected": -154.29824829101562, "loss": 0.7178, "rewards/accuracies": 0.5, "rewards/chosen": -0.47566860914230347, "rewards/margins": 1.03249192237854, "rewards/rejected": -1.5081605911254883, "step": 6975 }, { "epoch": 0.8, "learning_rate": 5.962776542198291e-08, "logits/chosen": -3.2670490741729736, "logits/rejected": -3.387162685394287, "logps/chosen": -158.5674285888672, "logps/rejected": -260.0092468261719, "loss": 0.2315, "rewards/accuracies": 0.875, "rewards/chosen": -0.09496927261352539, "rewards/margins": 2.3262791633605957, "rewards/rejected": -2.421248197555542, "step": 6976 }, { "epoch": 0.8, "learning_rate": 5.959264895235865e-08, "logits/chosen": -3.5319101810455322, "logits/rejected": -3.590482711791992, "logps/chosen": -366.9053039550781, "logps/rejected": -351.73193359375, "loss": 0.2228, "rewards/accuracies": 0.875, "rewards/chosen": 0.9525577425956726, "rewards/margins": 2.5397140979766846, "rewards/rejected": -1.5871565341949463, "step": 6977 }, { "epoch": 0.8, "learning_rate": 5.95575324827344e-08, "logits/chosen": -3.3996987342834473, "logits/rejected": -3.0378060340881348, "logps/chosen": -319.0865173339844, "logps/rejected": -176.8582763671875, "loss": 0.393, "rewards/accuracies": 0.875, "rewards/chosen": 0.16276530921459198, "rewards/margins": 1.1855764389038086, "rewards/rejected": -1.0228111743927002, "step": 6978 }, { "epoch": 0.8, "learning_rate": 5.9522416013110146e-08, "logits/chosen": -3.3175482749938965, "logits/rejected": -3.3471503257751465, "logps/chosen": -164.73910522460938, "logps/rejected": -241.93838500976562, "loss": 0.1989, "rewards/accuracies": 1.0, "rewards/chosen": 0.09648928791284561, "rewards/margins": 3.6138267517089844, "rewards/rejected": -3.5173375606536865, "step": 6979 }, { "epoch": 0.8, "learning_rate": 5.9487299543485894e-08, "logits/chosen": -2.9146969318389893, "logits/rejected": -2.7408101558685303, "logps/chosen": -159.3497314453125, "logps/rejected": -190.52230834960938, "loss": 0.4084, "rewards/accuracies": 0.75, "rewards/chosen": -0.1908813714981079, "rewards/margins": 1.473282814025879, "rewards/rejected": -1.6641643047332764, "step": 6980 }, { "epoch": 0.8, "learning_rate": 5.9452183073861634e-08, "logits/chosen": -3.195327043533325, "logits/rejected": -2.9846999645233154, "logps/chosen": -499.8396911621094, "logps/rejected": -320.21441650390625, "loss": 0.3131, "rewards/accuracies": 0.875, "rewards/chosen": -0.12372083961963654, "rewards/margins": 1.925856351852417, "rewards/rejected": -2.049577236175537, "step": 6981 }, { "epoch": 0.8, "learning_rate": 5.941706660423739e-08, "logits/chosen": -3.4754958152770996, "logits/rejected": -3.6751952171325684, "logps/chosen": -63.35642623901367, "logps/rejected": -156.18824768066406, "loss": 0.3988, "rewards/accuracies": 0.75, "rewards/chosen": 0.38382238149642944, "rewards/margins": 1.3138604164123535, "rewards/rejected": -0.9300379753112793, "step": 6982 }, { "epoch": 0.81, "learning_rate": 5.938195013461313e-08, "logits/chosen": -3.7706804275512695, "logits/rejected": -3.7852325439453125, "logps/chosen": -158.6859130859375, "logps/rejected": -240.93341064453125, "loss": 0.5957, "rewards/accuracies": 0.625, "rewards/chosen": -0.6287675499916077, "rewards/margins": 2.1492199897766113, "rewards/rejected": -2.777987480163574, "step": 6983 }, { "epoch": 0.81, "learning_rate": 5.934683366498888e-08, "logits/chosen": -3.154961585998535, "logits/rejected": -3.1029090881347656, "logps/chosen": -470.1600646972656, "logps/rejected": -329.5315856933594, "loss": 0.2676, "rewards/accuracies": 0.875, "rewards/chosen": 0.5353565812110901, "rewards/margins": 1.5144104957580566, "rewards/rejected": -0.9790538549423218, "step": 6984 }, { "epoch": 0.81, "learning_rate": 5.931171719536462e-08, "logits/chosen": -2.795490026473999, "logits/rejected": -2.897111415863037, "logps/chosen": -425.0279541015625, "logps/rejected": -292.76336669921875, "loss": 0.4729, "rewards/accuracies": 0.75, "rewards/chosen": 0.037370458245277405, "rewards/margins": 1.4702348709106445, "rewards/rejected": -1.4328645467758179, "step": 6985 }, { "epoch": 0.81, "learning_rate": 5.927660072574037e-08, "logits/chosen": -2.7410759925842285, "logits/rejected": -2.5345304012298584, "logps/chosen": -363.58935546875, "logps/rejected": -357.5035400390625, "loss": 0.2231, "rewards/accuracies": 0.875, "rewards/chosen": -0.16033661365509033, "rewards/margins": 2.76309871673584, "rewards/rejected": -2.9234352111816406, "step": 6986 }, { "epoch": 0.81, "learning_rate": 5.924148425611611e-08, "logits/chosen": -1.930738925933838, "logits/rejected": -1.9003853797912598, "logps/chosen": -497.39703369140625, "logps/rejected": -380.62359619140625, "loss": 0.4549, "rewards/accuracies": 0.75, "rewards/chosen": -0.4081801772117615, "rewards/margins": 1.057241678237915, "rewards/rejected": -1.4654216766357422, "step": 6987 }, { "epoch": 0.81, "learning_rate": 5.9206367786491865e-08, "logits/chosen": -2.314234733581543, "logits/rejected": -2.4437315464019775, "logps/chosen": -380.39361572265625, "logps/rejected": -383.2861328125, "loss": 0.387, "rewards/accuracies": 0.875, "rewards/chosen": 0.33161747455596924, "rewards/margins": 3.4700002670288086, "rewards/rejected": -3.1383824348449707, "step": 6988 }, { "epoch": 0.81, "learning_rate": 5.9171251316867606e-08, "logits/chosen": -2.6744751930236816, "logits/rejected": -2.9168758392333984, "logps/chosen": -240.18701171875, "logps/rejected": -384.57073974609375, "loss": 0.3944, "rewards/accuracies": 0.75, "rewards/chosen": -0.14186100661754608, "rewards/margins": 1.9036368131637573, "rewards/rejected": -2.0454978942871094, "step": 6989 }, { "epoch": 0.81, "learning_rate": 5.913613484724336e-08, "logits/chosen": -2.638814687728882, "logits/rejected": -2.533006429672241, "logps/chosen": -143.0211639404297, "logps/rejected": -138.11163330078125, "loss": 0.3834, "rewards/accuracies": 0.875, "rewards/chosen": -0.4954966604709625, "rewards/margins": 1.3878452777862549, "rewards/rejected": -1.883341908454895, "step": 6990 }, { "epoch": 0.81, "learning_rate": 5.91010183776191e-08, "logits/chosen": -2.7294762134552, "logits/rejected": -2.838654041290283, "logps/chosen": -145.97850036621094, "logps/rejected": -157.5864715576172, "loss": 0.7478, "rewards/accuracies": 0.5, "rewards/chosen": -0.0701386108994484, "rewards/margins": 0.5330374240875244, "rewards/rejected": -0.6031760573387146, "step": 6991 }, { "epoch": 0.81, "learning_rate": 5.906590190799485e-08, "logits/chosen": -2.2638461589813232, "logits/rejected": -2.4907429218292236, "logps/chosen": -342.3465576171875, "logps/rejected": -342.537353515625, "loss": 0.4364, "rewards/accuracies": 0.75, "rewards/chosen": 0.5741224884986877, "rewards/margins": 1.1225779056549072, "rewards/rejected": -0.5484555959701538, "step": 6992 }, { "epoch": 0.81, "learning_rate": 5.9030785438370595e-08, "logits/chosen": -3.102147102355957, "logits/rejected": -3.0724592208862305, "logps/chosen": -235.44862365722656, "logps/rejected": -198.119384765625, "loss": 0.5575, "rewards/accuracies": 0.75, "rewards/chosen": 0.5492009520530701, "rewards/margins": 1.7081482410430908, "rewards/rejected": -1.1589473485946655, "step": 6993 }, { "epoch": 0.81, "learning_rate": 5.899566896874634e-08, "logits/chosen": -4.08859920501709, "logits/rejected": -3.5882415771484375, "logps/chosen": -229.02944946289062, "logps/rejected": -188.18609619140625, "loss": 0.392, "rewards/accuracies": 0.875, "rewards/chosen": 0.07418704032897949, "rewards/margins": 1.1349377632141113, "rewards/rejected": -1.0607508420944214, "step": 6994 }, { "epoch": 0.81, "learning_rate": 5.896055249912208e-08, "logits/chosen": -2.73305606842041, "logits/rejected": -3.0745415687561035, "logps/chosen": -208.04461669921875, "logps/rejected": -162.91372680664062, "loss": 0.4648, "rewards/accuracies": 0.75, "rewards/chosen": -0.25808724761009216, "rewards/margins": 1.4644243717193604, "rewards/rejected": -1.7225115299224854, "step": 6995 }, { "epoch": 0.81, "learning_rate": 5.8925436029497836e-08, "logits/chosen": -2.9460697174072266, "logits/rejected": -2.4610087871551514, "logps/chosen": -257.615478515625, "logps/rejected": -197.93988037109375, "loss": 0.476, "rewards/accuracies": 0.875, "rewards/chosen": 0.07551130652427673, "rewards/margins": 0.999491810798645, "rewards/rejected": -0.9239804744720459, "step": 6996 }, { "epoch": 0.81, "learning_rate": 5.889031955987358e-08, "logits/chosen": -2.903322219848633, "logits/rejected": -2.9148881435394287, "logps/chosen": -268.1269226074219, "logps/rejected": -171.7779998779297, "loss": 0.4742, "rewards/accuracies": 0.75, "rewards/chosen": -0.43259888887405396, "rewards/margins": 2.082779884338379, "rewards/rejected": -2.515378713607788, "step": 6997 }, { "epoch": 0.81, "learning_rate": 5.885520309024933e-08, "logits/chosen": -3.9176628589630127, "logits/rejected": -3.814957857131958, "logps/chosen": -335.3868713378906, "logps/rejected": -344.2461242675781, "loss": 0.3694, "rewards/accuracies": 0.75, "rewards/chosen": -0.09627486765384674, "rewards/margins": 2.1334798336029053, "rewards/rejected": -2.229754686355591, "step": 6998 }, { "epoch": 0.81, "learning_rate": 5.882008662062507e-08, "logits/chosen": -3.500988006591797, "logits/rejected": -3.604020118713379, "logps/chosen": -174.91546630859375, "logps/rejected": -249.53306579589844, "loss": 0.1505, "rewards/accuracies": 1.0, "rewards/chosen": 0.058472298085689545, "rewards/margins": 2.4385275840759277, "rewards/rejected": -2.3800554275512695, "step": 6999 }, { "epoch": 0.81, "learning_rate": 5.878497015100082e-08, "logits/chosen": -2.493839979171753, "logits/rejected": -2.7347664833068848, "logps/chosen": -266.0604553222656, "logps/rejected": -237.8282470703125, "loss": 0.2504, "rewards/accuracies": 1.0, "rewards/chosen": 0.1719624102115631, "rewards/margins": 2.023911952972412, "rewards/rejected": -1.851949691772461, "step": 7000 }, { "epoch": 0.81, "eval_logits/chosen": -2.8362886905670166, "eval_logits/rejected": -2.7954912185668945, "eval_logps/chosen": -293.8367614746094, "eval_logps/rejected": -237.78546142578125, "eval_loss": 0.42330804467201233, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.02181965485215187, "eval_rewards/margins": 1.3661056756973267, "eval_rewards/rejected": -1.3442859649658203, "eval_runtime": 32.6673, "eval_samples_per_second": 2.143, "eval_steps_per_second": 1.071, "step": 7000 }, { "epoch": 0.81, "learning_rate": 5.8749853681376566e-08, "logits/chosen": -2.9051244258880615, "logits/rejected": -2.9484338760375977, "logps/chosen": -168.61651611328125, "logps/rejected": -201.84576416015625, "loss": 0.4599, "rewards/accuracies": 0.75, "rewards/chosen": -0.6740067005157471, "rewards/margins": 1.0631098747253418, "rewards/rejected": -1.7371165752410889, "step": 7001 }, { "epoch": 0.81, "learning_rate": 5.871473721175231e-08, "logits/chosen": -3.4423139095306396, "logits/rejected": -2.925476312637329, "logps/chosen": -505.7796325683594, "logps/rejected": -342.3538818359375, "loss": 0.1506, "rewards/accuracies": 1.0, "rewards/chosen": 0.799534261226654, "rewards/margins": 2.4757723808288574, "rewards/rejected": -1.6762381792068481, "step": 7002 }, { "epoch": 0.81, "learning_rate": 5.8679620742128054e-08, "logits/chosen": -3.111323118209839, "logits/rejected": -3.1676225662231445, "logps/chosen": -138.64088439941406, "logps/rejected": -182.73558044433594, "loss": 0.4436, "rewards/accuracies": 0.75, "rewards/chosen": -0.05641554296016693, "rewards/margins": 1.0240941047668457, "rewards/rejected": -1.0805097818374634, "step": 7003 }, { "epoch": 0.81, "learning_rate": 5.8644504272503794e-08, "logits/chosen": -2.593442916870117, "logits/rejected": -2.532731771469116, "logps/chosen": -168.3059539794922, "logps/rejected": -186.24221801757812, "loss": 0.5824, "rewards/accuracies": 0.5, "rewards/chosen": 0.08572201430797577, "rewards/margins": 0.7681580781936646, "rewards/rejected": -0.6824361085891724, "step": 7004 }, { "epoch": 0.81, "learning_rate": 5.860938780287955e-08, "logits/chosen": -2.6649162769317627, "logits/rejected": -2.6663284301757812, "logps/chosen": -203.22933959960938, "logps/rejected": -178.6185760498047, "loss": 0.33, "rewards/accuracies": 1.0, "rewards/chosen": -0.24206435680389404, "rewards/margins": 1.4337414503097534, "rewards/rejected": -1.6758058071136475, "step": 7005 }, { "epoch": 0.81, "learning_rate": 5.857427133325529e-08, "logits/chosen": -3.45418119430542, "logits/rejected": -3.2838146686553955, "logps/chosen": -293.0616455078125, "logps/rejected": -248.76087951660156, "loss": 0.3189, "rewards/accuracies": 0.875, "rewards/chosen": 0.13096627593040466, "rewards/margins": 1.6464900970458984, "rewards/rejected": -1.515523910522461, "step": 7006 }, { "epoch": 0.81, "learning_rate": 5.853915486363104e-08, "logits/chosen": -2.945352554321289, "logits/rejected": -2.8514316082000732, "logps/chosen": -276.66265869140625, "logps/rejected": -320.00323486328125, "loss": 1.034, "rewards/accuracies": 0.625, "rewards/chosen": -1.0379970073699951, "rewards/margins": 1.109207034111023, "rewards/rejected": -2.1472041606903076, "step": 7007 }, { "epoch": 0.81, "learning_rate": 5.850403839400678e-08, "logits/chosen": -2.81357479095459, "logits/rejected": -2.8694841861724854, "logps/chosen": -245.12779235839844, "logps/rejected": -246.33140563964844, "loss": 0.4074, "rewards/accuracies": 0.625, "rewards/chosen": 0.7012097239494324, "rewards/margins": 2.5055363178253174, "rewards/rejected": -1.8043264150619507, "step": 7008 }, { "epoch": 0.81, "learning_rate": 5.846892192438253e-08, "logits/chosen": -3.196967601776123, "logits/rejected": -3.756488561630249, "logps/chosen": -142.46762084960938, "logps/rejected": -294.9142761230469, "loss": 0.2482, "rewards/accuracies": 0.75, "rewards/chosen": 0.13832253217697144, "rewards/margins": 3.6251261234283447, "rewards/rejected": -3.4868035316467285, "step": 7009 }, { "epoch": 0.81, "learning_rate": 5.843380545475828e-08, "logits/chosen": -3.41225004196167, "logits/rejected": -3.477538585662842, "logps/chosen": -94.22686004638672, "logps/rejected": -141.69674682617188, "loss": 0.3236, "rewards/accuracies": 1.0, "rewards/chosen": 0.3070080280303955, "rewards/margins": 1.2230339050292969, "rewards/rejected": -0.9160258769989014, "step": 7010 }, { "epoch": 0.81, "learning_rate": 5.8398688985134025e-08, "logits/chosen": -2.688107490539551, "logits/rejected": -2.619704246520996, "logps/chosen": -288.6092529296875, "logps/rejected": -277.4413757324219, "loss": 0.4864, "rewards/accuracies": 0.75, "rewards/chosen": -0.23326177895069122, "rewards/margins": 0.8740946054458618, "rewards/rejected": -1.107356309890747, "step": 7011 }, { "epoch": 0.81, "learning_rate": 5.8363572515509766e-08, "logits/chosen": -2.9327104091644287, "logits/rejected": -2.9905567169189453, "logps/chosen": -394.778076171875, "logps/rejected": -358.7056884765625, "loss": 0.5225, "rewards/accuracies": 0.875, "rewards/chosen": -0.00019050762057304382, "rewards/margins": 0.944491446018219, "rewards/rejected": -0.9446818828582764, "step": 7012 }, { "epoch": 0.81, "learning_rate": 5.832845604588552e-08, "logits/chosen": -2.5158278942108154, "logits/rejected": -2.6115946769714355, "logps/chosen": -248.7677001953125, "logps/rejected": -173.294189453125, "loss": 0.534, "rewards/accuracies": 0.5, "rewards/chosen": 0.03588390350341797, "rewards/margins": 1.1677978038787842, "rewards/rejected": -1.1319141387939453, "step": 7013 }, { "epoch": 0.81, "learning_rate": 5.829333957626126e-08, "logits/chosen": -3.4239258766174316, "logits/rejected": -3.1715087890625, "logps/chosen": -211.8265838623047, "logps/rejected": -357.37579345703125, "loss": 0.2278, "rewards/accuracies": 0.875, "rewards/chosen": 0.26790139079093933, "rewards/margins": 3.259317398071289, "rewards/rejected": -2.9914159774780273, "step": 7014 }, { "epoch": 0.81, "learning_rate": 5.8258223106637014e-08, "logits/chosen": -2.8229074478149414, "logits/rejected": -2.7597951889038086, "logps/chosen": -133.48080444335938, "logps/rejected": -159.08413696289062, "loss": 0.5817, "rewards/accuracies": 0.875, "rewards/chosen": -0.4498964548110962, "rewards/margins": 1.2256112098693848, "rewards/rejected": -1.6755077838897705, "step": 7015 }, { "epoch": 0.81, "learning_rate": 5.8223106637012755e-08, "logits/chosen": -3.1342411041259766, "logits/rejected": -3.2511019706726074, "logps/chosen": -249.70947265625, "logps/rejected": -240.67068481445312, "loss": 0.3103, "rewards/accuracies": 0.875, "rewards/chosen": -0.1765601634979248, "rewards/margins": 2.4823625087738037, "rewards/rejected": -2.6589224338531494, "step": 7016 }, { "epoch": 0.81, "learning_rate": 5.81879901673885e-08, "logits/chosen": -3.510951042175293, "logits/rejected": -3.157930374145508, "logps/chosen": -304.3177490234375, "logps/rejected": -216.09658813476562, "loss": 0.3778, "rewards/accuracies": 0.75, "rewards/chosen": -0.11523973941802979, "rewards/margins": 2.106722116470337, "rewards/rejected": -2.221961736679077, "step": 7017 }, { "epoch": 0.81, "learning_rate": 5.815287369776425e-08, "logits/chosen": -2.5554585456848145, "logits/rejected": -2.37892746925354, "logps/chosen": -240.58502197265625, "logps/rejected": -266.75213623046875, "loss": 0.2241, "rewards/accuracies": 0.875, "rewards/chosen": 0.4494015574455261, "rewards/margins": 2.2332239151000977, "rewards/rejected": -1.7838222980499268, "step": 7018 }, { "epoch": 0.81, "learning_rate": 5.8117757228139996e-08, "logits/chosen": -2.719301700592041, "logits/rejected": -2.9576923847198486, "logps/chosen": -276.9740295410156, "logps/rejected": -332.9812316894531, "loss": 0.2072, "rewards/accuracies": 0.875, "rewards/chosen": 0.4142550230026245, "rewards/margins": 3.1963658332824707, "rewards/rejected": -2.7821106910705566, "step": 7019 }, { "epoch": 0.81, "learning_rate": 5.808264075851574e-08, "logits/chosen": -2.835714817047119, "logits/rejected": -2.7402169704437256, "logps/chosen": -236.01895141601562, "logps/rejected": -228.68280029296875, "loss": 0.5754, "rewards/accuracies": 0.625, "rewards/chosen": -0.0045690275728702545, "rewards/margins": 1.1852267980575562, "rewards/rejected": -1.1897958517074585, "step": 7020 }, { "epoch": 0.81, "learning_rate": 5.804752428889149e-08, "logits/chosen": -3.075382709503174, "logits/rejected": -2.87271785736084, "logps/chosen": -274.2955017089844, "logps/rejected": -283.7748718261719, "loss": 0.4992, "rewards/accuracies": 0.5, "rewards/chosen": -0.22629694640636444, "rewards/margins": 0.9724595546722412, "rewards/rejected": -1.198756456375122, "step": 7021 }, { "epoch": 0.81, "learning_rate": 5.801240781926723e-08, "logits/chosen": -2.9359560012817383, "logits/rejected": -2.6708314418792725, "logps/chosen": -215.87551879882812, "logps/rejected": -177.6206512451172, "loss": 0.397, "rewards/accuracies": 0.875, "rewards/chosen": -0.33864274621009827, "rewards/margins": 1.317594051361084, "rewards/rejected": -1.6562367677688599, "step": 7022 }, { "epoch": 0.81, "learning_rate": 5.797729134964298e-08, "logits/chosen": -3.1327714920043945, "logits/rejected": -2.90097713470459, "logps/chosen": -416.82562255859375, "logps/rejected": -414.0065002441406, "loss": 0.3975, "rewards/accuracies": 0.75, "rewards/chosen": 0.04780092090368271, "rewards/margins": 1.8778820037841797, "rewards/rejected": -1.8300807476043701, "step": 7023 }, { "epoch": 0.81, "learning_rate": 5.7942174880018726e-08, "logits/chosen": -2.749321222305298, "logits/rejected": -2.6748499870300293, "logps/chosen": -230.70370483398438, "logps/rejected": -291.6788024902344, "loss": 0.3914, "rewards/accuracies": 0.875, "rewards/chosen": -0.13591018319129944, "rewards/margins": 1.7908746004104614, "rewards/rejected": -1.9267847537994385, "step": 7024 }, { "epoch": 0.81, "learning_rate": 5.790705841039447e-08, "logits/chosen": -3.303863048553467, "logits/rejected": -3.4979522228240967, "logps/chosen": -270.1163330078125, "logps/rejected": -263.6848449707031, "loss": 0.2674, "rewards/accuracies": 0.75, "rewards/chosen": 0.2693692743778229, "rewards/margins": 2.038541555404663, "rewards/rejected": -1.7691724300384521, "step": 7025 }, { "epoch": 0.81, "learning_rate": 5.7871941940770214e-08, "logits/chosen": -2.8216116428375244, "logits/rejected": -2.847590446472168, "logps/chosen": -215.08717346191406, "logps/rejected": -276.6814880371094, "loss": 0.2254, "rewards/accuracies": 1.0, "rewards/chosen": 0.09741765260696411, "rewards/margins": 3.1318373680114746, "rewards/rejected": -3.0344197750091553, "step": 7026 }, { "epoch": 0.81, "learning_rate": 5.783682547114597e-08, "logits/chosen": -3.912646770477295, "logits/rejected": -3.8543429374694824, "logps/chosen": -236.34674072265625, "logps/rejected": -296.74420166015625, "loss": 0.3249, "rewards/accuracies": 0.75, "rewards/chosen": -0.10732920467853546, "rewards/margins": 2.2522826194763184, "rewards/rejected": -2.359611749649048, "step": 7027 }, { "epoch": 0.81, "learning_rate": 5.780170900152171e-08, "logits/chosen": -3.098193883895874, "logits/rejected": -2.8091702461242676, "logps/chosen": -306.3274230957031, "logps/rejected": -257.2455749511719, "loss": 0.2656, "rewards/accuracies": 0.875, "rewards/chosen": -0.15268273651599884, "rewards/margins": 2.8946781158447266, "rewards/rejected": -3.047360420227051, "step": 7028 }, { "epoch": 0.81, "learning_rate": 5.776659253189746e-08, "logits/chosen": -3.50736665725708, "logits/rejected": -3.6484546661376953, "logps/chosen": -182.63880920410156, "logps/rejected": -234.1070098876953, "loss": 0.1804, "rewards/accuracies": 0.875, "rewards/chosen": 0.43419313430786133, "rewards/margins": 4.1922993659973145, "rewards/rejected": -3.758105993270874, "step": 7029 }, { "epoch": 0.81, "learning_rate": 5.77314760622732e-08, "logits/chosen": -3.3539843559265137, "logits/rejected": -3.6536202430725098, "logps/chosen": -81.39289093017578, "logps/rejected": -188.53878784179688, "loss": 0.3056, "rewards/accuracies": 0.875, "rewards/chosen": -0.0565045103430748, "rewards/margins": 3.0868730545043945, "rewards/rejected": -3.1433777809143066, "step": 7030 }, { "epoch": 0.81, "learning_rate": 5.769635959264895e-08, "logits/chosen": -3.204451560974121, "logits/rejected": -3.3115899562835693, "logps/chosen": -140.57106018066406, "logps/rejected": -300.8236999511719, "loss": 0.3448, "rewards/accuracies": 0.875, "rewards/chosen": 0.012392483651638031, "rewards/margins": 1.391034722328186, "rewards/rejected": -1.378642201423645, "step": 7031 }, { "epoch": 0.81, "learning_rate": 5.76612431230247e-08, "logits/chosen": -3.5460705757141113, "logits/rejected": -4.080048084259033, "logps/chosen": -131.97496032714844, "logps/rejected": -260.0867004394531, "loss": 0.4248, "rewards/accuracies": 0.75, "rewards/chosen": -0.3123064339160919, "rewards/margins": 1.5114145278930664, "rewards/rejected": -1.8237210512161255, "step": 7032 }, { "epoch": 0.81, "learning_rate": 5.7626126653400444e-08, "logits/chosen": -3.5782439708709717, "logits/rejected": -3.3531854152679443, "logps/chosen": -405.2745666503906, "logps/rejected": -454.8512878417969, "loss": 0.1471, "rewards/accuracies": 1.0, "rewards/chosen": -0.0387142114341259, "rewards/margins": 2.743417739868164, "rewards/rejected": -2.782132148742676, "step": 7033 }, { "epoch": 0.81, "learning_rate": 5.7591010183776185e-08, "logits/chosen": -2.707460403442383, "logits/rejected": -2.593702793121338, "logps/chosen": -432.5107727050781, "logps/rejected": -462.3116149902344, "loss": 0.2387, "rewards/accuracies": 1.0, "rewards/chosen": 0.5372724533081055, "rewards/margins": 2.7662675380706787, "rewards/rejected": -2.2289953231811523, "step": 7034 }, { "epoch": 0.81, "learning_rate": 5.755589371415194e-08, "logits/chosen": -3.147020101547241, "logits/rejected": -3.1884093284606934, "logps/chosen": -71.85221099853516, "logps/rejected": -282.9247741699219, "loss": 0.5986, "rewards/accuracies": 0.5, "rewards/chosen": -0.04178383946418762, "rewards/margins": 1.0045280456542969, "rewards/rejected": -1.046311855316162, "step": 7035 }, { "epoch": 0.81, "learning_rate": 5.752077724452768e-08, "logits/chosen": -3.1600430011749268, "logits/rejected": -3.267702341079712, "logps/chosen": -354.5407409667969, "logps/rejected": -251.55972290039062, "loss": 0.1661, "rewards/accuracies": 1.0, "rewards/chosen": 0.8089035749435425, "rewards/margins": 2.563283920288086, "rewards/rejected": -1.7543803453445435, "step": 7036 }, { "epoch": 0.81, "learning_rate": 5.7485660774903433e-08, "logits/chosen": -3.1817052364349365, "logits/rejected": -3.1565184593200684, "logps/chosen": -80.84439086914062, "logps/rejected": -166.3172149658203, "loss": 0.3198, "rewards/accuracies": 0.75, "rewards/chosen": 0.38280996680259705, "rewards/margins": 2.0058820247650146, "rewards/rejected": -1.6230719089508057, "step": 7037 }, { "epoch": 0.81, "learning_rate": 5.7450544305279174e-08, "logits/chosen": -2.54144287109375, "logits/rejected": -2.425483226776123, "logps/chosen": -242.839111328125, "logps/rejected": -261.7880859375, "loss": 0.3703, "rewards/accuracies": 0.75, "rewards/chosen": -0.004092380404472351, "rewards/margins": 2.301875352859497, "rewards/rejected": -2.3059678077697754, "step": 7038 }, { "epoch": 0.81, "learning_rate": 5.741542783565492e-08, "logits/chosen": -3.841797113418579, "logits/rejected": -3.5687458515167236, "logps/chosen": -386.20355224609375, "logps/rejected": -313.2921447753906, "loss": 0.3021, "rewards/accuracies": 0.875, "rewards/chosen": -0.9863862991333008, "rewards/margins": 1.861578106880188, "rewards/rejected": -2.847964286804199, "step": 7039 }, { "epoch": 0.81, "learning_rate": 5.738031136603066e-08, "logits/chosen": -2.8970139026641846, "logits/rejected": -3.2469797134399414, "logps/chosen": -196.3568115234375, "logps/rejected": -218.02694702148438, "loss": 0.1967, "rewards/accuracies": 1.0, "rewards/chosen": 0.14216555655002594, "rewards/margins": 2.558840751647949, "rewards/rejected": -2.416675329208374, "step": 7040 }, { "epoch": 0.81, "learning_rate": 5.7345194896406416e-08, "logits/chosen": -3.126605987548828, "logits/rejected": -3.1066880226135254, "logps/chosen": -282.1741027832031, "logps/rejected": -166.8070068359375, "loss": 0.2532, "rewards/accuracies": 0.875, "rewards/chosen": 0.4176447093486786, "rewards/margins": 1.9743317365646362, "rewards/rejected": -1.5566872358322144, "step": 7041 }, { "epoch": 0.81, "learning_rate": 5.7310078426782156e-08, "logits/chosen": -3.0865578651428223, "logits/rejected": -3.6900711059570312, "logps/chosen": -153.4751739501953, "logps/rejected": -252.004150390625, "loss": 0.476, "rewards/accuracies": 0.75, "rewards/chosen": -0.4377203583717346, "rewards/margins": 1.9476804733276367, "rewards/rejected": -2.3854007720947266, "step": 7042 }, { "epoch": 0.81, "learning_rate": 5.727496195715791e-08, "logits/chosen": -2.827249765396118, "logits/rejected": -2.659858226776123, "logps/chosen": -267.361083984375, "logps/rejected": -293.8900451660156, "loss": 0.3435, "rewards/accuracies": 0.75, "rewards/chosen": 0.5057290196418762, "rewards/margins": 2.0650031566619873, "rewards/rejected": -1.5592741966247559, "step": 7043 }, { "epoch": 0.81, "learning_rate": 5.723984548753365e-08, "logits/chosen": -3.050323486328125, "logits/rejected": -3.104583740234375, "logps/chosen": -306.00384521484375, "logps/rejected": -311.7962646484375, "loss": 0.407, "rewards/accuracies": 0.875, "rewards/chosen": -1.0699100494384766, "rewards/margins": 1.2356595993041992, "rewards/rejected": -2.305569648742676, "step": 7044 }, { "epoch": 0.81, "learning_rate": 5.72047290179094e-08, "logits/chosen": -3.1819331645965576, "logits/rejected": -2.8479716777801514, "logps/chosen": -343.167724609375, "logps/rejected": -222.93482971191406, "loss": 0.3684, "rewards/accuracies": 0.875, "rewards/chosen": 0.14843156933784485, "rewards/margins": 1.267166256904602, "rewards/rejected": -1.11873459815979, "step": 7045 }, { "epoch": 0.81, "learning_rate": 5.7169612548285145e-08, "logits/chosen": -3.110764265060425, "logits/rejected": -2.930696964263916, "logps/chosen": -280.049560546875, "logps/rejected": -333.2024230957031, "loss": 0.2563, "rewards/accuracies": 0.875, "rewards/chosen": -0.03746870905160904, "rewards/margins": 2.4979894161224365, "rewards/rejected": -2.5354583263397217, "step": 7046 }, { "epoch": 0.81, "learning_rate": 5.713449607866089e-08, "logits/chosen": -2.9193787574768066, "logits/rejected": -3.132394313812256, "logps/chosen": -295.9837341308594, "logps/rejected": -485.7255554199219, "loss": 0.5455, "rewards/accuracies": 0.75, "rewards/chosen": -0.5538333654403687, "rewards/margins": 1.6877435445785522, "rewards/rejected": -2.241576910018921, "step": 7047 }, { "epoch": 0.81, "learning_rate": 5.709937960903663e-08, "logits/chosen": -3.5206079483032227, "logits/rejected": -3.667102575302124, "logps/chosen": -158.73484802246094, "logps/rejected": -110.18072509765625, "loss": 0.5939, "rewards/accuracies": 0.625, "rewards/chosen": -0.15645790100097656, "rewards/margins": 0.9980670213699341, "rewards/rejected": -1.1545250415802002, "step": 7048 }, { "epoch": 0.81, "learning_rate": 5.706426313941239e-08, "logits/chosen": -2.5946037769317627, "logits/rejected": -2.6919455528259277, "logps/chosen": -427.58929443359375, "logps/rejected": -314.52093505859375, "loss": 0.3701, "rewards/accuracies": 0.875, "rewards/chosen": 0.36567479372024536, "rewards/margins": 2.1707184314727783, "rewards/rejected": -1.8050434589385986, "step": 7049 }, { "epoch": 0.81, "learning_rate": 5.702914666978813e-08, "logits/chosen": -2.777996778488159, "logits/rejected": -2.699032783508301, "logps/chosen": -264.9035949707031, "logps/rejected": -259.9676513671875, "loss": 0.4568, "rewards/accuracies": 0.875, "rewards/chosen": -0.18916812539100647, "rewards/margins": 1.6577956676483154, "rewards/rejected": -1.8469637632369995, "step": 7050 }, { "epoch": 0.81, "learning_rate": 5.699403020016388e-08, "logits/chosen": -3.391815662384033, "logits/rejected": -3.2201616764068604, "logps/chosen": -405.2630615234375, "logps/rejected": -167.54440307617188, "loss": 0.199, "rewards/accuracies": 1.0, "rewards/chosen": 0.3981601595878601, "rewards/margins": 2.1905150413513184, "rewards/rejected": -1.792354941368103, "step": 7051 }, { "epoch": 0.81, "learning_rate": 5.695891373053962e-08, "logits/chosen": -3.227337598800659, "logits/rejected": -3.163895606994629, "logps/chosen": -180.7281951904297, "logps/rejected": -389.0982971191406, "loss": 0.2694, "rewards/accuracies": 1.0, "rewards/chosen": -0.07126118987798691, "rewards/margins": 1.6756329536437988, "rewards/rejected": -1.7468942403793335, "step": 7052 }, { "epoch": 0.81, "learning_rate": 5.692379726091536e-08, "logits/chosen": -3.6207454204559326, "logits/rejected": -3.599393367767334, "logps/chosen": -406.77947998046875, "logps/rejected": -295.5972595214844, "loss": 0.3406, "rewards/accuracies": 0.875, "rewards/chosen": -0.5041491985321045, "rewards/margins": 1.401402235031128, "rewards/rejected": -1.905551552772522, "step": 7053 }, { "epoch": 0.81, "learning_rate": 5.6888680791291116e-08, "logits/chosen": -3.334864377975464, "logits/rejected": -3.2278780937194824, "logps/chosen": -300.92242431640625, "logps/rejected": -257.08489990234375, "loss": 0.6339, "rewards/accuracies": 0.625, "rewards/chosen": -0.49210023880004883, "rewards/margins": 1.449122428894043, "rewards/rejected": -1.9412225484848022, "step": 7054 }, { "epoch": 0.81, "learning_rate": 5.685356432166686e-08, "logits/chosen": -3.036513566970825, "logits/rejected": -2.95225191116333, "logps/chosen": -261.46881103515625, "logps/rejected": -257.7674255371094, "loss": 0.4225, "rewards/accuracies": 0.875, "rewards/chosen": -0.41657161712646484, "rewards/margins": 1.2301901578903198, "rewards/rejected": -1.6467617750167847, "step": 7055 }, { "epoch": 0.81, "learning_rate": 5.6818447852042604e-08, "logits/chosen": -2.8878931999206543, "logits/rejected": -3.1583895683288574, "logps/chosen": -276.63275146484375, "logps/rejected": -254.91697692871094, "loss": 0.3093, "rewards/accuracies": 0.75, "rewards/chosen": 0.7066150903701782, "rewards/margins": 2.489630937576294, "rewards/rejected": -1.7830158472061157, "step": 7056 }, { "epoch": 0.81, "learning_rate": 5.6783331382418345e-08, "logits/chosen": -3.782564878463745, "logits/rejected": -3.586973190307617, "logps/chosen": -97.64024353027344, "logps/rejected": -116.10865783691406, "loss": 0.5069, "rewards/accuracies": 0.875, "rewards/chosen": -0.28266292810440063, "rewards/margins": 1.0339677333831787, "rewards/rejected": -1.3166306018829346, "step": 7057 }, { "epoch": 0.81, "learning_rate": 5.67482149127941e-08, "logits/chosen": -2.8402411937713623, "logits/rejected": -2.9018378257751465, "logps/chosen": -227.19827270507812, "logps/rejected": -317.8902587890625, "loss": 0.2066, "rewards/accuracies": 1.0, "rewards/chosen": -0.3257594108581543, "rewards/margins": 2.291743040084839, "rewards/rejected": -2.617502450942993, "step": 7058 }, { "epoch": 0.81, "learning_rate": 5.671309844316984e-08, "logits/chosen": -3.6081953048706055, "logits/rejected": -3.2927210330963135, "logps/chosen": -306.507080078125, "logps/rejected": -231.87355041503906, "loss": 0.3894, "rewards/accuracies": 0.875, "rewards/chosen": 0.3053259253501892, "rewards/margins": 1.331560492515564, "rewards/rejected": -1.02623450756073, "step": 7059 }, { "epoch": 0.81, "learning_rate": 5.667798197354559e-08, "logits/chosen": -3.3207836151123047, "logits/rejected": -3.1705055236816406, "logps/chosen": -468.75201416015625, "logps/rejected": -237.28713989257812, "loss": 0.3831, "rewards/accuracies": 0.875, "rewards/chosen": 0.4280315637588501, "rewards/margins": 1.8350305557250977, "rewards/rejected": -1.406998872756958, "step": 7060 }, { "epoch": 0.81, "learning_rate": 5.6642865503921334e-08, "logits/chosen": -3.359374761581421, "logits/rejected": -3.183685064315796, "logps/chosen": -282.3336181640625, "logps/rejected": -287.80010986328125, "loss": 0.5238, "rewards/accuracies": 0.625, "rewards/chosen": 0.2787518799304962, "rewards/margins": 1.3831183910369873, "rewards/rejected": -1.1043665409088135, "step": 7061 }, { "epoch": 0.81, "learning_rate": 5.660774903429708e-08, "logits/chosen": -3.4322497844696045, "logits/rejected": -3.386507034301758, "logps/chosen": -255.1927490234375, "logps/rejected": -295.40985107421875, "loss": 0.3453, "rewards/accuracies": 0.75, "rewards/chosen": 0.19994470477104187, "rewards/margins": 3.092789888381958, "rewards/rejected": -2.8928451538085938, "step": 7062 }, { "epoch": 0.81, "learning_rate": 5.657263256467283e-08, "logits/chosen": -3.438406467437744, "logits/rejected": -3.175628900527954, "logps/chosen": -251.47628784179688, "logps/rejected": -294.6208190917969, "loss": 0.3252, "rewards/accuracies": 0.75, "rewards/chosen": -0.4550783038139343, "rewards/margins": 2.113854169845581, "rewards/rejected": -2.56893253326416, "step": 7063 }, { "epoch": 0.81, "learning_rate": 5.6537516095048576e-08, "logits/chosen": -3.1073198318481445, "logits/rejected": -3.1878671646118164, "logps/chosen": -341.9613037109375, "logps/rejected": -292.1611328125, "loss": 0.4168, "rewards/accuracies": 0.75, "rewards/chosen": 0.14789438247680664, "rewards/margins": 1.339041829109192, "rewards/rejected": -1.1911474466323853, "step": 7064 }, { "epoch": 0.81, "learning_rate": 5.6502399625424316e-08, "logits/chosen": -3.2322499752044678, "logits/rejected": -3.1525685787200928, "logps/chosen": -230.96800231933594, "logps/rejected": -400.3896484375, "loss": 0.2376, "rewards/accuracies": 0.875, "rewards/chosen": 0.08606119453907013, "rewards/margins": 2.494959831237793, "rewards/rejected": -2.4088988304138184, "step": 7065 }, { "epoch": 0.81, "learning_rate": 5.646728315580007e-08, "logits/chosen": -2.995748996734619, "logits/rejected": -2.8926000595092773, "logps/chosen": -176.25741577148438, "logps/rejected": -110.837646484375, "loss": 0.639, "rewards/accuracies": 0.625, "rewards/chosen": -0.6685460805892944, "rewards/margins": 0.40451210737228394, "rewards/rejected": -1.0730582475662231, "step": 7066 }, { "epoch": 0.81, "learning_rate": 5.643216668617581e-08, "logits/chosen": -2.9526665210723877, "logits/rejected": -2.7665019035339355, "logps/chosen": -206.21615600585938, "logps/rejected": -306.4855651855469, "loss": 0.6685, "rewards/accuracies": 0.5, "rewards/chosen": -0.3203727602958679, "rewards/margins": 0.38039350509643555, "rewards/rejected": -0.7007662653923035, "step": 7067 }, { "epoch": 0.81, "learning_rate": 5.6397050216551565e-08, "logits/chosen": -2.9689197540283203, "logits/rejected": -2.6862335205078125, "logps/chosen": -215.55511474609375, "logps/rejected": -297.5643005371094, "loss": 1.1776, "rewards/accuracies": 0.375, "rewards/chosen": -0.9492173790931702, "rewards/margins": -0.48783284425735474, "rewards/rejected": -0.46138453483581543, "step": 7068 }, { "epoch": 0.81, "learning_rate": 5.6361933746927305e-08, "logits/chosen": -3.0212974548339844, "logits/rejected": -3.2587404251098633, "logps/chosen": -154.463623046875, "logps/rejected": -272.8995666503906, "loss": 0.2192, "rewards/accuracies": 0.875, "rewards/chosen": 0.5217602849006653, "rewards/margins": 2.1935935020446777, "rewards/rejected": -1.6718332767486572, "step": 7069 }, { "epoch": 0.82, "learning_rate": 5.632681727730305e-08, "logits/chosen": -3.0041098594665527, "logits/rejected": -2.9872727394104004, "logps/chosen": -169.76791381835938, "logps/rejected": -142.58639526367188, "loss": 0.3135, "rewards/accuracies": 0.875, "rewards/chosen": 0.2464982569217682, "rewards/margins": 1.506847858428955, "rewards/rejected": -1.2603496313095093, "step": 7070 }, { "epoch": 0.82, "learning_rate": 5.629170080767879e-08, "logits/chosen": -3.275524139404297, "logits/rejected": -3.119741201400757, "logps/chosen": -257.7836608886719, "logps/rejected": -196.55044555664062, "loss": 0.5916, "rewards/accuracies": 0.75, "rewards/chosen": -0.17790032923221588, "rewards/margins": 2.461857318878174, "rewards/rejected": -2.6397576332092285, "step": 7071 }, { "epoch": 0.82, "learning_rate": 5.625658433805455e-08, "logits/chosen": -3.1378724575042725, "logits/rejected": -3.2077736854553223, "logps/chosen": -247.58306884765625, "logps/rejected": -176.12905883789062, "loss": 0.4589, "rewards/accuracies": 0.875, "rewards/chosen": 0.1516421139240265, "rewards/margins": 1.2360289096832275, "rewards/rejected": -1.0843868255615234, "step": 7072 }, { "epoch": 0.82, "learning_rate": 5.622146786843029e-08, "logits/chosen": -2.735647201538086, "logits/rejected": -2.6106834411621094, "logps/chosen": -338.3728332519531, "logps/rejected": -319.166015625, "loss": 0.2904, "rewards/accuracies": 1.0, "rewards/chosen": -0.06503213196992874, "rewards/margins": 1.5332378149032593, "rewards/rejected": -1.5982699394226074, "step": 7073 }, { "epoch": 0.82, "learning_rate": 5.618635139880604e-08, "logits/chosen": -2.7515780925750732, "logits/rejected": -2.9231112003326416, "logps/chosen": -324.6691589355469, "logps/rejected": -332.10443115234375, "loss": 0.1665, "rewards/accuracies": 1.0, "rewards/chosen": 0.21707721054553986, "rewards/margins": 2.3996899127960205, "rewards/rejected": -2.182612657546997, "step": 7074 }, { "epoch": 0.82, "learning_rate": 5.615123492918178e-08, "logits/chosen": -3.1166272163391113, "logits/rejected": -3.191347360610962, "logps/chosen": -201.09896850585938, "logps/rejected": -209.6114044189453, "loss": 0.4058, "rewards/accuracies": 0.875, "rewards/chosen": 0.21603630483150482, "rewards/margins": 2.0323991775512695, "rewards/rejected": -1.816362977027893, "step": 7075 }, { "epoch": 0.82, "learning_rate": 5.611611845955753e-08, "logits/chosen": -3.6852574348449707, "logits/rejected": -3.6955854892730713, "logps/chosen": -197.50306701660156, "logps/rejected": -266.58428955078125, "loss": 0.4192, "rewards/accuracies": 0.875, "rewards/chosen": -0.5733964443206787, "rewards/margins": 1.1692750453948975, "rewards/rejected": -1.7426716089248657, "step": 7076 }, { "epoch": 0.82, "learning_rate": 5.6081001989933276e-08, "logits/chosen": -2.519528388977051, "logits/rejected": -2.7924022674560547, "logps/chosen": -439.73358154296875, "logps/rejected": -240.33419799804688, "loss": 0.6863, "rewards/accuracies": 0.5, "rewards/chosen": 0.010356217622756958, "rewards/margins": 0.5724209547042847, "rewards/rejected": -0.5620647668838501, "step": 7077 }, { "epoch": 0.82, "learning_rate": 5.6045885520309024e-08, "logits/chosen": -2.952589273452759, "logits/rejected": -2.894784450531006, "logps/chosen": -248.90403747558594, "logps/rejected": -184.84259033203125, "loss": 0.4159, "rewards/accuracies": 0.875, "rewards/chosen": -0.2236766666173935, "rewards/margins": 0.8994431495666504, "rewards/rejected": -1.1231197118759155, "step": 7078 }, { "epoch": 0.82, "learning_rate": 5.6010769050684764e-08, "logits/chosen": -3.8146562576293945, "logits/rejected": -3.951063394546509, "logps/chosen": -301.46038818359375, "logps/rejected": -192.5223846435547, "loss": 0.5273, "rewards/accuracies": 0.625, "rewards/chosen": -0.269353985786438, "rewards/margins": 1.4656658172607422, "rewards/rejected": -1.7350196838378906, "step": 7079 }, { "epoch": 0.82, "learning_rate": 5.597565258106052e-08, "logits/chosen": -2.900376319885254, "logits/rejected": -2.872828722000122, "logps/chosen": -250.26870727539062, "logps/rejected": -216.10549926757812, "loss": 0.4067, "rewards/accuracies": 0.75, "rewards/chosen": -0.5317414999008179, "rewards/margins": 3.1164350509643555, "rewards/rejected": -3.648176670074463, "step": 7080 }, { "epoch": 0.82, "learning_rate": 5.594053611143626e-08, "logits/chosen": -3.8526978492736816, "logits/rejected": -3.902538537979126, "logps/chosen": -370.6311950683594, "logps/rejected": -410.0972900390625, "loss": 0.8254, "rewards/accuracies": 0.625, "rewards/chosen": -0.6767392754554749, "rewards/margins": 1.161439061164856, "rewards/rejected": -1.8381781578063965, "step": 7081 }, { "epoch": 0.82, "learning_rate": 5.590541964181201e-08, "logits/chosen": -2.8272626399993896, "logits/rejected": -2.8615105152130127, "logps/chosen": -685.072021484375, "logps/rejected": -641.4850463867188, "loss": 0.5283, "rewards/accuracies": 0.625, "rewards/chosen": -0.45681530237197876, "rewards/margins": 1.2376024723052979, "rewards/rejected": -1.6944177150726318, "step": 7082 }, { "epoch": 0.82, "learning_rate": 5.587030317218775e-08, "logits/chosen": -3.2007060050964355, "logits/rejected": -3.1233367919921875, "logps/chosen": -462.0911865234375, "logps/rejected": -291.7182312011719, "loss": 0.3886, "rewards/accuracies": 0.75, "rewards/chosen": -0.2548060417175293, "rewards/margins": 1.5906261205673218, "rewards/rejected": -1.845432162284851, "step": 7083 }, { "epoch": 0.82, "learning_rate": 5.58351867025635e-08, "logits/chosen": -3.1351892948150635, "logits/rejected": -3.1308822631835938, "logps/chosen": -281.2229309082031, "logps/rejected": -290.6504211425781, "loss": 0.406, "rewards/accuracies": 0.875, "rewards/chosen": -0.695325493812561, "rewards/margins": 1.2385369539260864, "rewards/rejected": -1.9338624477386475, "step": 7084 }, { "epoch": 0.82, "learning_rate": 5.580007023293925e-08, "logits/chosen": -2.4682297706604004, "logits/rejected": -2.5058419704437256, "logps/chosen": -317.8101501464844, "logps/rejected": -314.1389465332031, "loss": 0.4079, "rewards/accuracies": 0.875, "rewards/chosen": 0.00435948371887207, "rewards/margins": 1.0696160793304443, "rewards/rejected": -1.0652565956115723, "step": 7085 }, { "epoch": 0.82, "learning_rate": 5.5764953763314995e-08, "logits/chosen": -3.4052867889404297, "logits/rejected": -3.580050230026245, "logps/chosen": -256.4182434082031, "logps/rejected": -269.349365234375, "loss": 0.183, "rewards/accuracies": 1.0, "rewards/chosen": 0.12558375298976898, "rewards/margins": 2.2278125286102295, "rewards/rejected": -2.1022286415100098, "step": 7086 }, { "epoch": 0.82, "learning_rate": 5.5729837293690736e-08, "logits/chosen": -2.6451597213745117, "logits/rejected": -2.76731538772583, "logps/chosen": -283.95550537109375, "logps/rejected": -195.28419494628906, "loss": 0.5736, "rewards/accuracies": 0.75, "rewards/chosen": -0.0431140661239624, "rewards/margins": 1.1461060047149658, "rewards/rejected": -1.1892200708389282, "step": 7087 }, { "epoch": 0.82, "learning_rate": 5.569472082406649e-08, "logits/chosen": -2.9680416584014893, "logits/rejected": -2.699769973754883, "logps/chosen": -234.42343139648438, "logps/rejected": -245.4241943359375, "loss": 0.3785, "rewards/accuracies": 0.75, "rewards/chosen": 0.15280485153198242, "rewards/margins": 1.6252148151397705, "rewards/rejected": -1.472409963607788, "step": 7088 }, { "epoch": 0.82, "learning_rate": 5.565960435444223e-08, "logits/chosen": -3.1271395683288574, "logits/rejected": -2.9621267318725586, "logps/chosen": -193.8024139404297, "logps/rejected": -215.2017364501953, "loss": 0.8313, "rewards/accuracies": 0.75, "rewards/chosen": 0.16300687193870544, "rewards/margins": 1.9391807317733765, "rewards/rejected": -1.7761738300323486, "step": 7089 }, { "epoch": 0.82, "learning_rate": 5.562448788481798e-08, "logits/chosen": -2.6596081256866455, "logits/rejected": -2.748577117919922, "logps/chosen": -313.57952880859375, "logps/rejected": -202.201171875, "loss": 0.3118, "rewards/accuracies": 0.875, "rewards/chosen": 0.08566740900278091, "rewards/margins": 1.4105364084243774, "rewards/rejected": -1.3248690366744995, "step": 7090 }, { "epoch": 0.82, "learning_rate": 5.5589371415193725e-08, "logits/chosen": -3.806964159011841, "logits/rejected": -3.5072247982025146, "logps/chosen": -234.96473693847656, "logps/rejected": -151.3390655517578, "loss": 0.3696, "rewards/accuracies": 0.75, "rewards/chosen": -0.1668606698513031, "rewards/margins": 1.820281744003296, "rewards/rejected": -1.9871424436569214, "step": 7091 }, { "epoch": 0.82, "learning_rate": 5.555425494556947e-08, "logits/chosen": -2.7330214977264404, "logits/rejected": -3.023786783218384, "logps/chosen": -213.3662567138672, "logps/rejected": -260.3422546386719, "loss": 0.3295, "rewards/accuracies": 0.875, "rewards/chosen": -0.3202010989189148, "rewards/margins": 1.8877284526824951, "rewards/rejected": -2.2079296112060547, "step": 7092 }, { "epoch": 0.82, "learning_rate": 5.551913847594521e-08, "logits/chosen": -2.5855352878570557, "logits/rejected": -2.600278854370117, "logps/chosen": -253.9904327392578, "logps/rejected": -192.25364685058594, "loss": 0.733, "rewards/accuracies": 0.5, "rewards/chosen": -0.6619327664375305, "rewards/margins": 0.7816194295883179, "rewards/rejected": -1.4435522556304932, "step": 7093 }, { "epoch": 0.82, "learning_rate": 5.5484022006320966e-08, "logits/chosen": -2.436964988708496, "logits/rejected": -2.318171739578247, "logps/chosen": -299.86834716796875, "logps/rejected": -215.6698760986328, "loss": 0.2903, "rewards/accuracies": 0.875, "rewards/chosen": -0.6762658953666687, "rewards/margins": 1.7401716709136963, "rewards/rejected": -2.4164376258850098, "step": 7094 }, { "epoch": 0.82, "learning_rate": 5.544890553669671e-08, "logits/chosen": -3.92901611328125, "logits/rejected": -3.7500557899475098, "logps/chosen": -140.6545867919922, "logps/rejected": -180.45822143554688, "loss": 0.208, "rewards/accuracies": 1.0, "rewards/chosen": 0.10577109456062317, "rewards/margins": 2.3322598934173584, "rewards/rejected": -2.2264890670776367, "step": 7095 }, { "epoch": 0.82, "learning_rate": 5.541378906707246e-08, "logits/chosen": -3.7872354984283447, "logits/rejected": -3.5539309978485107, "logps/chosen": -358.5177917480469, "logps/rejected": -232.00234985351562, "loss": 0.2649, "rewards/accuracies": 0.875, "rewards/chosen": -0.2504768669605255, "rewards/margins": 2.7441132068634033, "rewards/rejected": -2.9945898056030273, "step": 7096 }, { "epoch": 0.82, "learning_rate": 5.53786725974482e-08, "logits/chosen": -3.2905468940734863, "logits/rejected": -3.0145328044891357, "logps/chosen": -206.1131134033203, "logps/rejected": -196.57994079589844, "loss": 0.2694, "rewards/accuracies": 0.875, "rewards/chosen": -0.11245220899581909, "rewards/margins": 1.570654273033142, "rewards/rejected": -1.6831063032150269, "step": 7097 }, { "epoch": 0.82, "learning_rate": 5.534355612782395e-08, "logits/chosen": -3.4214577674865723, "logits/rejected": -3.5702950954437256, "logps/chosen": -280.1068420410156, "logps/rejected": -238.83953857421875, "loss": 0.2664, "rewards/accuracies": 0.875, "rewards/chosen": -0.36264070868492126, "rewards/margins": 1.8021775484085083, "rewards/rejected": -2.164818286895752, "step": 7098 }, { "epoch": 0.82, "learning_rate": 5.5308439658199696e-08, "logits/chosen": -3.536156177520752, "logits/rejected": -3.0974879264831543, "logps/chosen": -346.6443176269531, "logps/rejected": -288.46331787109375, "loss": 0.5007, "rewards/accuracies": 0.875, "rewards/chosen": -0.4743553400039673, "rewards/margins": 1.0005881786346436, "rewards/rejected": -1.4749436378479004, "step": 7099 }, { "epoch": 0.82, "learning_rate": 5.5273323188575436e-08, "logits/chosen": -3.426936149597168, "logits/rejected": -3.062103033065796, "logps/chosen": -289.6976318359375, "logps/rejected": -211.37982177734375, "loss": 0.3313, "rewards/accuracies": 0.875, "rewards/chosen": -0.035894013941287994, "rewards/margins": 1.4520708322525024, "rewards/rejected": -1.4879648685455322, "step": 7100 }, { "epoch": 0.82, "learning_rate": 5.5238206718951184e-08, "logits/chosen": -3.3491029739379883, "logits/rejected": -3.363377094268799, "logps/chosen": -100.10203552246094, "logps/rejected": -208.3076629638672, "loss": 0.4727, "rewards/accuracies": 0.625, "rewards/chosen": -0.1137009933590889, "rewards/margins": 1.5027084350585938, "rewards/rejected": -1.616409420967102, "step": 7101 }, { "epoch": 0.82, "learning_rate": 5.520309024932693e-08, "logits/chosen": -3.3375144004821777, "logits/rejected": -3.3835184574127197, "logps/chosen": -223.378173828125, "logps/rejected": -188.72727966308594, "loss": 0.6059, "rewards/accuracies": 0.625, "rewards/chosen": -0.48113158345222473, "rewards/margins": 0.9668976664543152, "rewards/rejected": -1.4480292797088623, "step": 7102 }, { "epoch": 0.82, "learning_rate": 5.516797377970268e-08, "logits/chosen": -3.001511573791504, "logits/rejected": -3.3444631099700928, "logps/chosen": -189.11050415039062, "logps/rejected": -207.21458435058594, "loss": 0.3631, "rewards/accuracies": 0.875, "rewards/chosen": 0.11339936405420303, "rewards/margins": 2.3371145725250244, "rewards/rejected": -2.22371506690979, "step": 7103 }, { "epoch": 0.82, "learning_rate": 5.513285731007842e-08, "logits/chosen": -4.2181525230407715, "logits/rejected": -4.147733211517334, "logps/chosen": -204.74851989746094, "logps/rejected": -289.3227844238281, "loss": 0.4376, "rewards/accuracies": 0.75, "rewards/chosen": -0.14262992143630981, "rewards/margins": 0.936086893081665, "rewards/rejected": -1.07871675491333, "step": 7104 }, { "epoch": 0.82, "learning_rate": 5.509774084045417e-08, "logits/chosen": -3.318091630935669, "logits/rejected": -3.257157802581787, "logps/chosen": -185.50595092773438, "logps/rejected": -183.26112365722656, "loss": 0.2922, "rewards/accuracies": 0.875, "rewards/chosen": 0.23624268174171448, "rewards/margins": 2.1310958862304688, "rewards/rejected": -1.894853115081787, "step": 7105 }, { "epoch": 0.82, "learning_rate": 5.506262437082991e-08, "logits/chosen": -2.317072629928589, "logits/rejected": -2.347303867340088, "logps/chosen": -384.7866516113281, "logps/rejected": -403.66741943359375, "loss": 0.2828, "rewards/accuracies": 1.0, "rewards/chosen": 0.08356037735939026, "rewards/margins": 1.3286412954330444, "rewards/rejected": -1.2450810670852661, "step": 7106 }, { "epoch": 0.82, "learning_rate": 5.502750790120566e-08, "logits/chosen": -3.4087719917297363, "logits/rejected": -3.592648506164551, "logps/chosen": -241.17202758789062, "logps/rejected": -239.26170349121094, "loss": 0.5878, "rewards/accuracies": 0.5, "rewards/chosen": -0.24762877821922302, "rewards/margins": 1.3556208610534668, "rewards/rejected": -1.6032495498657227, "step": 7107 }, { "epoch": 0.82, "learning_rate": 5.499239143158141e-08, "logits/chosen": -3.2962396144866943, "logits/rejected": -3.17842960357666, "logps/chosen": -261.74749755859375, "logps/rejected": -230.29367065429688, "loss": 0.2935, "rewards/accuracies": 0.875, "rewards/chosen": -0.393573522567749, "rewards/margins": 1.8459275960922241, "rewards/rejected": -2.2395009994506836, "step": 7108 }, { "epoch": 0.82, "learning_rate": 5.4957274961957155e-08, "logits/chosen": -3.3644227981567383, "logits/rejected": -3.234234571456909, "logps/chosen": -333.8115539550781, "logps/rejected": -214.1597137451172, "loss": 0.376, "rewards/accuracies": 0.75, "rewards/chosen": -0.5869728326797485, "rewards/margins": 1.588766098022461, "rewards/rejected": -2.175739049911499, "step": 7109 }, { "epoch": 0.82, "learning_rate": 5.4922158492332896e-08, "logits/chosen": -3.1037120819091797, "logits/rejected": -2.7979791164398193, "logps/chosen": -191.82717895507812, "logps/rejected": -261.0047302246094, "loss": 0.4103, "rewards/accuracies": 0.875, "rewards/chosen": -0.6177102327346802, "rewards/margins": 1.1065542697906494, "rewards/rejected": -1.7242646217346191, "step": 7110 }, { "epoch": 0.82, "learning_rate": 5.488704202270865e-08, "logits/chosen": -2.9968347549438477, "logits/rejected": -3.0185165405273438, "logps/chosen": -320.313232421875, "logps/rejected": -307.19061279296875, "loss": 0.3367, "rewards/accuracies": 0.75, "rewards/chosen": 0.16595155000686646, "rewards/margins": 2.8731255531311035, "rewards/rejected": -2.707174062728882, "step": 7111 }, { "epoch": 0.82, "learning_rate": 5.485192555308439e-08, "logits/chosen": -3.476222276687622, "logits/rejected": -3.2149643898010254, "logps/chosen": -275.4999084472656, "logps/rejected": -322.5150146484375, "loss": 0.3746, "rewards/accuracies": 0.875, "rewards/chosen": -0.2037954330444336, "rewards/margins": 0.9272400140762329, "rewards/rejected": -1.1310354471206665, "step": 7112 }, { "epoch": 0.82, "learning_rate": 5.4816809083460144e-08, "logits/chosen": -3.3259406089782715, "logits/rejected": -3.5277209281921387, "logps/chosen": -128.9437713623047, "logps/rejected": -287.9943542480469, "loss": 0.4801, "rewards/accuracies": 0.75, "rewards/chosen": -0.02124711498618126, "rewards/margins": 1.0774606466293335, "rewards/rejected": -1.0987077951431274, "step": 7113 }, { "epoch": 0.82, "learning_rate": 5.4781692613835885e-08, "logits/chosen": -2.591050148010254, "logits/rejected": -2.9913177490234375, "logps/chosen": -509.6435546875, "logps/rejected": -333.4090270996094, "loss": 0.3701, "rewards/accuracies": 0.875, "rewards/chosen": 0.15163275599479675, "rewards/margins": 1.787489891052246, "rewards/rejected": -1.635857105255127, "step": 7114 }, { "epoch": 0.82, "learning_rate": 5.474657614421163e-08, "logits/chosen": -3.7225074768066406, "logits/rejected": -3.3928699493408203, "logps/chosen": -327.8751220703125, "logps/rejected": -282.6995849609375, "loss": 0.3936, "rewards/accuracies": 1.0, "rewards/chosen": -0.4192380905151367, "rewards/margins": 1.2128431797027588, "rewards/rejected": -1.6320812702178955, "step": 7115 }, { "epoch": 0.82, "learning_rate": 5.471145967458738e-08, "logits/chosen": -3.586422920227051, "logits/rejected": -3.3939459323883057, "logps/chosen": -266.06298828125, "logps/rejected": -253.4732666015625, "loss": 0.2199, "rewards/accuracies": 1.0, "rewards/chosen": -0.08766976743936539, "rewards/margins": 2.016558885574341, "rewards/rejected": -2.1042287349700928, "step": 7116 }, { "epoch": 0.82, "learning_rate": 5.4676343204963126e-08, "logits/chosen": -2.8334364891052246, "logits/rejected": -3.1148083209991455, "logps/chosen": -202.23394775390625, "logps/rejected": -258.0196838378906, "loss": 0.5485, "rewards/accuracies": 0.625, "rewards/chosen": -0.14447063207626343, "rewards/margins": 1.9710063934326172, "rewards/rejected": -2.1154768466949463, "step": 7117 }, { "epoch": 0.82, "learning_rate": 5.464122673533887e-08, "logits/chosen": -2.603865623474121, "logits/rejected": -2.776315927505493, "logps/chosen": -342.0797119140625, "logps/rejected": -309.4161071777344, "loss": 0.3552, "rewards/accuracies": 0.875, "rewards/chosen": -0.5659746527671814, "rewards/margins": 2.0174472332000732, "rewards/rejected": -2.5834217071533203, "step": 7118 }, { "epoch": 0.82, "learning_rate": 5.460611026571462e-08, "logits/chosen": -3.1425440311431885, "logits/rejected": -3.1078341007232666, "logps/chosen": -220.35784912109375, "logps/rejected": -158.66111755371094, "loss": 1.0974, "rewards/accuracies": 0.5, "rewards/chosen": -0.9560684561729431, "rewards/margins": 0.19644419848918915, "rewards/rejected": -1.152512550354004, "step": 7119 }, { "epoch": 0.82, "learning_rate": 5.457099379609036e-08, "logits/chosen": -3.786813735961914, "logits/rejected": -3.804370403289795, "logps/chosen": -376.12359619140625, "logps/rejected": -353.0893859863281, "loss": 0.0876, "rewards/accuracies": 1.0, "rewards/chosen": 0.18266546726226807, "rewards/margins": 2.940398693084717, "rewards/rejected": -2.7577333450317383, "step": 7120 }, { "epoch": 0.82, "learning_rate": 5.4535877326466115e-08, "logits/chosen": -2.7635393142700195, "logits/rejected": -2.6908092498779297, "logps/chosen": -477.5245361328125, "logps/rejected": -335.4281921386719, "loss": 0.7203, "rewards/accuracies": 0.75, "rewards/chosen": 0.20364710688591003, "rewards/margins": 1.1700901985168457, "rewards/rejected": -0.9664431214332581, "step": 7121 }, { "epoch": 0.82, "learning_rate": 5.4500760856841856e-08, "logits/chosen": -2.2166295051574707, "logits/rejected": -2.214165210723877, "logps/chosen": -179.15115356445312, "logps/rejected": -159.06268310546875, "loss": 0.7361, "rewards/accuracies": 0.625, "rewards/chosen": -0.21381348371505737, "rewards/margins": 0.5099722146987915, "rewards/rejected": -0.7237857580184937, "step": 7122 }, { "epoch": 0.82, "learning_rate": 5.44656443872176e-08, "logits/chosen": -3.256969451904297, "logits/rejected": -3.2838261127471924, "logps/chosen": -142.9159393310547, "logps/rejected": -159.37301635742188, "loss": 0.5007, "rewards/accuracies": 0.75, "rewards/chosen": -0.7400006055831909, "rewards/margins": 1.1559216976165771, "rewards/rejected": -1.895922303199768, "step": 7123 }, { "epoch": 0.82, "learning_rate": 5.4430527917593344e-08, "logits/chosen": -3.1001839637756348, "logits/rejected": -2.84311842918396, "logps/chosen": -221.5218963623047, "logps/rejected": -209.4567413330078, "loss": 0.5703, "rewards/accuracies": 0.75, "rewards/chosen": -0.4193398356437683, "rewards/margins": 0.5012933611869812, "rewards/rejected": -0.9206331968307495, "step": 7124 }, { "epoch": 0.82, "learning_rate": 5.43954114479691e-08, "logits/chosen": -3.7675180435180664, "logits/rejected": -3.6534531116485596, "logps/chosen": -140.07489013671875, "logps/rejected": -137.56500244140625, "loss": 0.3777, "rewards/accuracies": 0.75, "rewards/chosen": 0.3066025972366333, "rewards/margins": 2.194904327392578, "rewards/rejected": -1.8883017301559448, "step": 7125 }, { "epoch": 0.82, "learning_rate": 5.436029497834484e-08, "logits/chosen": -2.5257420539855957, "logits/rejected": -2.8547449111938477, "logps/chosen": -196.67913818359375, "logps/rejected": -252.99290466308594, "loss": 0.3526, "rewards/accuracies": 0.75, "rewards/chosen": 0.05926968902349472, "rewards/margins": 2.667520046234131, "rewards/rejected": -2.608250617980957, "step": 7126 }, { "epoch": 0.82, "learning_rate": 5.432517850872059e-08, "logits/chosen": -3.7101993560791016, "logits/rejected": -3.991537094116211, "logps/chosen": -174.7431182861328, "logps/rejected": -246.19772338867188, "loss": 0.2321, "rewards/accuracies": 1.0, "rewards/chosen": 0.03859903663396835, "rewards/margins": 1.957377552986145, "rewards/rejected": -1.9187785387039185, "step": 7127 }, { "epoch": 0.82, "learning_rate": 5.429006203909633e-08, "logits/chosen": -2.4049227237701416, "logits/rejected": -2.3480963706970215, "logps/chosen": -274.2750244140625, "logps/rejected": -314.7406311035156, "loss": 0.4021, "rewards/accuracies": 0.875, "rewards/chosen": -0.0894150510430336, "rewards/margins": 1.4254539012908936, "rewards/rejected": -1.514868974685669, "step": 7128 }, { "epoch": 0.82, "learning_rate": 5.425494556947208e-08, "logits/chosen": -3.3813998699188232, "logits/rejected": -3.408198833465576, "logps/chosen": -116.60267639160156, "logps/rejected": -209.93798828125, "loss": 0.5026, "rewards/accuracies": 0.875, "rewards/chosen": -0.1124332994222641, "rewards/margins": 1.3458800315856934, "rewards/rejected": -1.4583134651184082, "step": 7129 }, { "epoch": 0.82, "learning_rate": 5.421982909984783e-08, "logits/chosen": -3.2054264545440674, "logits/rejected": -2.9508614540100098, "logps/chosen": -229.07440185546875, "logps/rejected": -150.50164794921875, "loss": 0.4218, "rewards/accuracies": 0.75, "rewards/chosen": -0.5230340361595154, "rewards/margins": 1.2570263147354126, "rewards/rejected": -1.7800602912902832, "step": 7130 }, { "epoch": 0.82, "learning_rate": 5.4184712630223574e-08, "logits/chosen": -3.1950016021728516, "logits/rejected": -3.2600908279418945, "logps/chosen": -181.82037353515625, "logps/rejected": -197.30224609375, "loss": 0.382, "rewards/accuracies": 0.875, "rewards/chosen": -0.20639705657958984, "rewards/margins": 1.3572319746017456, "rewards/rejected": -1.563629150390625, "step": 7131 }, { "epoch": 0.82, "learning_rate": 5.4149596160599315e-08, "logits/chosen": -2.3831636905670166, "logits/rejected": -2.250735282897949, "logps/chosen": -372.8475341796875, "logps/rejected": -401.07257080078125, "loss": 0.49, "rewards/accuracies": 0.625, "rewards/chosen": -0.20846614241600037, "rewards/margins": 0.998725175857544, "rewards/rejected": -1.2071913480758667, "step": 7132 }, { "epoch": 0.82, "learning_rate": 5.411447969097507e-08, "logits/chosen": -3.5911002159118652, "logits/rejected": -3.5294382572174072, "logps/chosen": -217.8195037841797, "logps/rejected": -245.01553344726562, "loss": 0.2001, "rewards/accuracies": 0.875, "rewards/chosen": 0.38681507110595703, "rewards/margins": 3.208322048187256, "rewards/rejected": -2.821506977081299, "step": 7133 }, { "epoch": 0.82, "learning_rate": 5.407936322135081e-08, "logits/chosen": -2.926800489425659, "logits/rejected": -3.1174659729003906, "logps/chosen": -552.4227294921875, "logps/rejected": -190.94989013671875, "loss": 0.3774, "rewards/accuracies": 0.75, "rewards/chosen": 0.29180508852005005, "rewards/margins": 2.3337221145629883, "rewards/rejected": -2.041917085647583, "step": 7134 }, { "epoch": 0.82, "learning_rate": 5.4044246751726563e-08, "logits/chosen": -2.7444968223571777, "logits/rejected": -2.9273557662963867, "logps/chosen": -355.1973571777344, "logps/rejected": -276.47833251953125, "loss": 0.1702, "rewards/accuracies": 0.875, "rewards/chosen": -0.12823784351348877, "rewards/margins": 4.171582221984863, "rewards/rejected": -4.299820423126221, "step": 7135 }, { "epoch": 0.82, "learning_rate": 5.4009130282102304e-08, "logits/chosen": -3.17832612991333, "logits/rejected": -3.4203057289123535, "logps/chosen": -206.61595153808594, "logps/rejected": -215.2950439453125, "loss": 0.7406, "rewards/accuracies": 0.5, "rewards/chosen": -0.9272035956382751, "rewards/margins": 0.761696994304657, "rewards/rejected": -1.6889004707336426, "step": 7136 }, { "epoch": 0.82, "learning_rate": 5.397401381247805e-08, "logits/chosen": -3.2560009956359863, "logits/rejected": -2.881807804107666, "logps/chosen": -235.8383331298828, "logps/rejected": -182.64089965820312, "loss": 1.0773, "rewards/accuracies": 0.5, "rewards/chosen": -0.7596648931503296, "rewards/margins": 0.6561343669891357, "rewards/rejected": -1.4157992601394653, "step": 7137 }, { "epoch": 0.82, "learning_rate": 5.39388973428538e-08, "logits/chosen": -3.241103172302246, "logits/rejected": -3.3165550231933594, "logps/chosen": -222.8415069580078, "logps/rejected": -182.9735107421875, "loss": 0.2551, "rewards/accuracies": 0.875, "rewards/chosen": -0.22179633378982544, "rewards/margins": 2.108006477355957, "rewards/rejected": -2.3298027515411377, "step": 7138 }, { "epoch": 0.82, "learning_rate": 5.3903780873229546e-08, "logits/chosen": -3.2768564224243164, "logits/rejected": -3.292141914367676, "logps/chosen": -245.9980926513672, "logps/rejected": -278.5570983886719, "loss": 0.395, "rewards/accuracies": 0.75, "rewards/chosen": -0.22541603446006775, "rewards/margins": 1.4013360738754272, "rewards/rejected": -1.6267521381378174, "step": 7139 }, { "epoch": 0.82, "learning_rate": 5.3868664403605286e-08, "logits/chosen": -3.258636474609375, "logits/rejected": -3.3108372688293457, "logps/chosen": -206.52711486816406, "logps/rejected": -167.8974609375, "loss": 0.645, "rewards/accuracies": 0.5, "rewards/chosen": -0.7569208145141602, "rewards/margins": 0.8343013525009155, "rewards/rejected": -1.5912221670150757, "step": 7140 }, { "epoch": 0.82, "learning_rate": 5.383354793398104e-08, "logits/chosen": -2.9101128578186035, "logits/rejected": -2.874324083328247, "logps/chosen": -193.00894165039062, "logps/rejected": -199.1323699951172, "loss": 0.3838, "rewards/accuracies": 0.875, "rewards/chosen": -0.3595592975616455, "rewards/margins": 1.5790296792984009, "rewards/rejected": -1.9385889768600464, "step": 7141 }, { "epoch": 0.82, "learning_rate": 5.379843146435678e-08, "logits/chosen": -3.735628843307495, "logits/rejected": -3.389333724975586, "logps/chosen": -258.93865966796875, "logps/rejected": -198.241943359375, "loss": 0.1263, "rewards/accuracies": 1.0, "rewards/chosen": 0.31665122509002686, "rewards/margins": 2.761840581893921, "rewards/rejected": -2.4451892375946045, "step": 7142 }, { "epoch": 0.82, "learning_rate": 5.376331499473253e-08, "logits/chosen": -3.662111759185791, "logits/rejected": -3.6177330017089844, "logps/chosen": -190.16529846191406, "logps/rejected": -195.35638427734375, "loss": 0.3526, "rewards/accuracies": 0.875, "rewards/chosen": 0.14735686779022217, "rewards/margins": 2.345254421234131, "rewards/rejected": -2.197897434234619, "step": 7143 }, { "epoch": 0.82, "learning_rate": 5.3728198525108275e-08, "logits/chosen": -3.3476250171661377, "logits/rejected": -3.492668628692627, "logps/chosen": -238.50482177734375, "logps/rejected": -223.25210571289062, "loss": 0.4713, "rewards/accuracies": 0.75, "rewards/chosen": -0.6927677989006042, "rewards/margins": 2.179865837097168, "rewards/rejected": -2.872633457183838, "step": 7144 }, { "epoch": 0.82, "learning_rate": 5.369308205548402e-08, "logits/chosen": -2.8849036693573, "logits/rejected": -2.997025728225708, "logps/chosen": -325.3931579589844, "logps/rejected": -387.0048522949219, "loss": 0.5015, "rewards/accuracies": 0.625, "rewards/chosen": -0.0015087127685546875, "rewards/margins": 1.183915376663208, "rewards/rejected": -1.1854240894317627, "step": 7145 }, { "epoch": 0.82, "learning_rate": 5.365796558585976e-08, "logits/chosen": -3.024273157119751, "logits/rejected": -3.23806095123291, "logps/chosen": -273.0790100097656, "logps/rejected": -349.974365234375, "loss": 0.433, "rewards/accuracies": 0.75, "rewards/chosen": -0.0017879605293273926, "rewards/margins": 1.4102909564971924, "rewards/rejected": -1.4120787382125854, "step": 7146 }, { "epoch": 0.82, "learning_rate": 5.362284911623551e-08, "logits/chosen": -3.270214796066284, "logits/rejected": -3.303323984146118, "logps/chosen": -261.51580810546875, "logps/rejected": -432.56707763671875, "loss": 0.657, "rewards/accuracies": 0.75, "rewards/chosen": -0.32239580154418945, "rewards/margins": 1.277784824371338, "rewards/rejected": -1.6001808643341064, "step": 7147 }, { "epoch": 0.82, "learning_rate": 5.358773264661126e-08, "logits/chosen": -3.13706111907959, "logits/rejected": -2.816362142562866, "logps/chosen": -238.332275390625, "logps/rejected": -276.6945495605469, "loss": 0.3635, "rewards/accuracies": 0.75, "rewards/chosen": -0.294575035572052, "rewards/margins": 2.0045483112335205, "rewards/rejected": -2.2991232872009277, "step": 7148 }, { "epoch": 0.82, "learning_rate": 5.3552616176987e-08, "logits/chosen": -3.4177966117858887, "logits/rejected": -3.2029786109924316, "logps/chosen": -432.92755126953125, "logps/rejected": -209.31185913085938, "loss": 0.4904, "rewards/accuracies": 0.625, "rewards/chosen": 0.1412757933139801, "rewards/margins": 1.3529679775238037, "rewards/rejected": -1.211692214012146, "step": 7149 }, { "epoch": 0.82, "learning_rate": 5.351749970736275e-08, "logits/chosen": -2.968412160873413, "logits/rejected": -2.9250307083129883, "logps/chosen": -280.2703857421875, "logps/rejected": -262.619140625, "loss": 0.3841, "rewards/accuracies": 0.875, "rewards/chosen": -0.2529021203517914, "rewards/margins": 1.0485327243804932, "rewards/rejected": -1.3014349937438965, "step": 7150 }, { "epoch": 0.82, "learning_rate": 5.348238323773849e-08, "logits/chosen": -3.407029151916504, "logits/rejected": -3.115996837615967, "logps/chosen": -270.0254821777344, "logps/rejected": -246.6224365234375, "loss": 0.5909, "rewards/accuracies": 0.375, "rewards/chosen": -0.4621444344520569, "rewards/margins": 1.432503342628479, "rewards/rejected": -1.8946478366851807, "step": 7151 }, { "epoch": 0.82, "learning_rate": 5.3447266768114247e-08, "logits/chosen": -3.179720401763916, "logits/rejected": -3.467926263809204, "logps/chosen": -296.02886962890625, "logps/rejected": -235.03262329101562, "loss": 0.5672, "rewards/accuracies": 0.75, "rewards/chosen": -0.5206654071807861, "rewards/margins": 0.8019474148750305, "rewards/rejected": -1.3226128816604614, "step": 7152 }, { "epoch": 0.82, "learning_rate": 5.341215029848999e-08, "logits/chosen": -3.497636079788208, "logits/rejected": -3.410590887069702, "logps/chosen": -336.58538818359375, "logps/rejected": -273.8204345703125, "loss": 0.1595, "rewards/accuracies": 1.0, "rewards/chosen": 0.8151699900627136, "rewards/margins": 2.796783208847046, "rewards/rejected": -1.9816131591796875, "step": 7153 }, { "epoch": 0.82, "learning_rate": 5.3377033828865734e-08, "logits/chosen": -3.283355712890625, "logits/rejected": -3.2371985912323, "logps/chosen": -172.03509521484375, "logps/rejected": -331.6355285644531, "loss": 0.5594, "rewards/accuracies": 0.625, "rewards/chosen": -0.44285818934440613, "rewards/margins": 2.647660970687866, "rewards/rejected": -3.0905191898345947, "step": 7154 }, { "epoch": 0.82, "learning_rate": 5.334191735924148e-08, "logits/chosen": -2.7365598678588867, "logits/rejected": -2.6112942695617676, "logps/chosen": -263.00067138671875, "logps/rejected": -293.7089538574219, "loss": 0.1302, "rewards/accuracies": 1.0, "rewards/chosen": 0.7234277725219727, "rewards/margins": 3.002239942550659, "rewards/rejected": -2.2788124084472656, "step": 7155 }, { "epoch": 0.82, "learning_rate": 5.330680088961723e-08, "logits/chosen": -2.6125688552856445, "logits/rejected": -2.529881000518799, "logps/chosen": -318.92181396484375, "logps/rejected": -364.12603759765625, "loss": 0.54, "rewards/accuracies": 0.75, "rewards/chosen": -0.7279793620109558, "rewards/margins": 2.256826400756836, "rewards/rejected": -2.9848058223724365, "step": 7156 }, { "epoch": 0.83, "learning_rate": 5.327168441999297e-08, "logits/chosen": -2.704072952270508, "logits/rejected": -2.6610424518585205, "logps/chosen": -332.5299072265625, "logps/rejected": -322.45233154296875, "loss": 0.1542, "rewards/accuracies": 1.0, "rewards/chosen": 0.6599267721176147, "rewards/margins": 3.0830421447753906, "rewards/rejected": -2.4231152534484863, "step": 7157 }, { "epoch": 0.83, "learning_rate": 5.323656795036872e-08, "logits/chosen": -3.3705644607543945, "logits/rejected": -3.3625173568725586, "logps/chosen": -317.4811096191406, "logps/rejected": -281.2325439453125, "loss": 0.1607, "rewards/accuracies": 1.0, "rewards/chosen": 0.12237464636564255, "rewards/margins": 3.722754716873169, "rewards/rejected": -3.600379705429077, "step": 7158 }, { "epoch": 0.83, "learning_rate": 5.3201451480744464e-08, "logits/chosen": -3.4120538234710693, "logits/rejected": -3.447821855545044, "logps/chosen": -124.3772964477539, "logps/rejected": -180.6776885986328, "loss": 1.1916, "rewards/accuracies": 0.5, "rewards/chosen": -1.5838055610656738, "rewards/margins": -0.13256797194480896, "rewards/rejected": -1.4512377977371216, "step": 7159 }, { "epoch": 0.83, "learning_rate": 5.316633501112021e-08, "logits/chosen": -2.476067543029785, "logits/rejected": -2.604236602783203, "logps/chosen": -394.2427978515625, "logps/rejected": -293.75384521484375, "loss": 0.5223, "rewards/accuracies": 0.75, "rewards/chosen": -0.14745891094207764, "rewards/margins": 1.5967051982879639, "rewards/rejected": -1.744164228439331, "step": 7160 }, { "epoch": 0.83, "learning_rate": 5.313121854149596e-08, "logits/chosen": -3.1099724769592285, "logits/rejected": -2.860887050628662, "logps/chosen": -227.5123291015625, "logps/rejected": -230.17047119140625, "loss": 0.4825, "rewards/accuracies": 0.625, "rewards/chosen": -0.334161639213562, "rewards/margins": 0.9114069938659668, "rewards/rejected": -1.2455687522888184, "step": 7161 }, { "epoch": 0.83, "learning_rate": 5.3096102071871706e-08, "logits/chosen": -3.5224366188049316, "logits/rejected": -3.5810699462890625, "logps/chosen": -286.3636169433594, "logps/rejected": -216.73960876464844, "loss": 0.1391, "rewards/accuracies": 1.0, "rewards/chosen": 0.48567676544189453, "rewards/margins": 3.5095911026000977, "rewards/rejected": -3.0239145755767822, "step": 7162 }, { "epoch": 0.83, "learning_rate": 5.3060985602247446e-08, "logits/chosen": -2.6303887367248535, "logits/rejected": -3.276106834411621, "logps/chosen": -136.97982788085938, "logps/rejected": -303.70794677734375, "loss": 0.2894, "rewards/accuracies": 0.875, "rewards/chosen": 0.1255500614643097, "rewards/margins": 2.690877676010132, "rewards/rejected": -2.5653276443481445, "step": 7163 }, { "epoch": 0.83, "learning_rate": 5.30258691326232e-08, "logits/chosen": -3.3790528774261475, "logits/rejected": -2.9400475025177, "logps/chosen": -362.17694091796875, "logps/rejected": -177.6874542236328, "loss": 0.3965, "rewards/accuracies": 0.875, "rewards/chosen": -0.3523489236831665, "rewards/margins": 1.061598539352417, "rewards/rejected": -1.413947343826294, "step": 7164 }, { "epoch": 0.83, "learning_rate": 5.299075266299894e-08, "logits/chosen": -3.048398017883301, "logits/rejected": -2.827425479888916, "logps/chosen": -411.25592041015625, "logps/rejected": -397.6034851074219, "loss": 0.1547, "rewards/accuracies": 1.0, "rewards/chosen": -0.15595278143882751, "rewards/margins": 2.9642677307128906, "rewards/rejected": -3.12022066116333, "step": 7165 }, { "epoch": 0.83, "learning_rate": 5.2955636193374695e-08, "logits/chosen": -2.9141664505004883, "logits/rejected": -3.1487598419189453, "logps/chosen": -269.5955505371094, "logps/rejected": -297.6686096191406, "loss": 0.3057, "rewards/accuracies": 0.875, "rewards/chosen": -0.19615979492664337, "rewards/margins": 2.1887223720550537, "rewards/rejected": -2.3848822116851807, "step": 7166 }, { "epoch": 0.83, "learning_rate": 5.2920519723750435e-08, "logits/chosen": -2.4134950637817383, "logits/rejected": -2.3975229263305664, "logps/chosen": -108.747314453125, "logps/rejected": -303.2008056640625, "loss": 0.2625, "rewards/accuracies": 0.875, "rewards/chosen": -0.4618927240371704, "rewards/margins": 2.3741979598999023, "rewards/rejected": -2.8360908031463623, "step": 7167 }, { "epoch": 0.83, "learning_rate": 5.288540325412618e-08, "logits/chosen": -3.163064956665039, "logits/rejected": -3.0900492668151855, "logps/chosen": -155.19085693359375, "logps/rejected": -236.49017333984375, "loss": 0.7081, "rewards/accuracies": 0.625, "rewards/chosen": -0.1751229166984558, "rewards/margins": 0.9185384511947632, "rewards/rejected": -1.0936614274978638, "step": 7168 }, { "epoch": 0.83, "learning_rate": 5.285028678450193e-08, "logits/chosen": -2.591860771179199, "logits/rejected": -2.758780002593994, "logps/chosen": -124.29107666015625, "logps/rejected": -136.834228515625, "loss": 0.4042, "rewards/accuracies": 0.75, "rewards/chosen": -0.039963752031326294, "rewards/margins": 1.0059757232666016, "rewards/rejected": -1.0459394454956055, "step": 7169 }, { "epoch": 0.83, "learning_rate": 5.281517031487768e-08, "logits/chosen": -3.179471969604492, "logits/rejected": -3.239856243133545, "logps/chosen": -395.94378662109375, "logps/rejected": -269.49871826171875, "loss": 0.6818, "rewards/accuracies": 0.75, "rewards/chosen": -0.6943668127059937, "rewards/margins": 0.7898698449134827, "rewards/rejected": -1.484236717224121, "step": 7170 }, { "epoch": 0.83, "learning_rate": 5.278005384525342e-08, "logits/chosen": -3.3077614307403564, "logits/rejected": -3.3139548301696777, "logps/chosen": -271.8511657714844, "logps/rejected": -289.1751708984375, "loss": 0.4971, "rewards/accuracies": 0.75, "rewards/chosen": -0.11771325021982193, "rewards/margins": 2.3311009407043457, "rewards/rejected": -2.4488143920898438, "step": 7171 }, { "epoch": 0.83, "learning_rate": 5.274493737562917e-08, "logits/chosen": -3.085782051086426, "logits/rejected": -3.1233084201812744, "logps/chosen": -335.94659423828125, "logps/rejected": -324.3938903808594, "loss": 0.5618, "rewards/accuracies": 0.5, "rewards/chosen": 0.18316471576690674, "rewards/margins": 1.665304183959961, "rewards/rejected": -1.4821393489837646, "step": 7172 }, { "epoch": 0.83, "learning_rate": 5.270982090600491e-08, "logits/chosen": -3.1732585430145264, "logits/rejected": -2.8213343620300293, "logps/chosen": -429.6387634277344, "logps/rejected": -335.6704406738281, "loss": 0.158, "rewards/accuracies": 1.0, "rewards/chosen": 0.23916402459144592, "rewards/margins": 2.4587173461914062, "rewards/rejected": -2.2195534706115723, "step": 7173 }, { "epoch": 0.83, "learning_rate": 5.267470443638066e-08, "logits/chosen": -3.4048473834991455, "logits/rejected": -3.248838186264038, "logps/chosen": -205.46798706054688, "logps/rejected": -153.02816772460938, "loss": 0.4758, "rewards/accuracies": 0.75, "rewards/chosen": -0.017721980810165405, "rewards/margins": 1.7634456157684326, "rewards/rejected": -1.78116774559021, "step": 7174 }, { "epoch": 0.83, "learning_rate": 5.2639587966756406e-08, "logits/chosen": -2.9116263389587402, "logits/rejected": -2.775219440460205, "logps/chosen": -221.50941467285156, "logps/rejected": -176.64328002929688, "loss": 0.4434, "rewards/accuracies": 0.75, "rewards/chosen": 0.5129404664039612, "rewards/margins": 1.1801316738128662, "rewards/rejected": -0.6671911478042603, "step": 7175 }, { "epoch": 0.83, "learning_rate": 5.2604471497132154e-08, "logits/chosen": -2.0995755195617676, "logits/rejected": -2.053757905960083, "logps/chosen": -175.2369842529297, "logps/rejected": -272.14349365234375, "loss": 0.1189, "rewards/accuracies": 1.0, "rewards/chosen": 0.14458192884922028, "rewards/margins": 2.9634451866149902, "rewards/rejected": -2.8188633918762207, "step": 7176 }, { "epoch": 0.83, "learning_rate": 5.2569355027507894e-08, "logits/chosen": -2.9591825008392334, "logits/rejected": -2.8129115104675293, "logps/chosen": -239.23223876953125, "logps/rejected": -276.94976806640625, "loss": 0.6112, "rewards/accuracies": 0.625, "rewards/chosen": -0.25859880447387695, "rewards/margins": 2.1577720642089844, "rewards/rejected": -2.4163706302642822, "step": 7177 }, { "epoch": 0.83, "learning_rate": 5.253423855788365e-08, "logits/chosen": -3.009730339050293, "logits/rejected": -2.8602051734924316, "logps/chosen": -230.31021118164062, "logps/rejected": -318.58575439453125, "loss": 0.3124, "rewards/accuracies": 0.875, "rewards/chosen": -0.4576041102409363, "rewards/margins": 2.6772921085357666, "rewards/rejected": -3.1348962783813477, "step": 7178 }, { "epoch": 0.83, "learning_rate": 5.249912208825939e-08, "logits/chosen": -3.850715160369873, "logits/rejected": -3.1356279850006104, "logps/chosen": -274.42120361328125, "logps/rejected": -186.80441284179688, "loss": 0.4682, "rewards/accuracies": 0.875, "rewards/chosen": 0.011641651391983032, "rewards/margins": 1.5573867559432983, "rewards/rejected": -1.5457451343536377, "step": 7179 }, { "epoch": 0.83, "learning_rate": 5.246400561863514e-08, "logits/chosen": -3.125565767288208, "logits/rejected": -3.3291115760803223, "logps/chosen": -345.64190673828125, "logps/rejected": -352.2969665527344, "loss": 0.6559, "rewards/accuracies": 0.75, "rewards/chosen": -0.8319402933120728, "rewards/margins": 0.9330559372901917, "rewards/rejected": -1.7649961709976196, "step": 7180 }, { "epoch": 0.83, "learning_rate": 5.242888914901088e-08, "logits/chosen": -3.443859338760376, "logits/rejected": -3.7568769454956055, "logps/chosen": -191.58399963378906, "logps/rejected": -230.5093994140625, "loss": 0.1438, "rewards/accuracies": 0.875, "rewards/chosen": -0.1696305274963379, "rewards/margins": 2.773963451385498, "rewards/rejected": -2.943593740463257, "step": 7181 }, { "epoch": 0.83, "learning_rate": 5.239377267938663e-08, "logits/chosen": -3.2939257621765137, "logits/rejected": -3.2996158599853516, "logps/chosen": -104.30058288574219, "logps/rejected": -200.30947875976562, "loss": 0.3947, "rewards/accuracies": 0.875, "rewards/chosen": 0.4234180450439453, "rewards/margins": 1.401839256286621, "rewards/rejected": -0.9784212708473206, "step": 7182 }, { "epoch": 0.83, "learning_rate": 5.235865620976238e-08, "logits/chosen": -3.3709359169006348, "logits/rejected": -3.409945487976074, "logps/chosen": -182.3096923828125, "logps/rejected": -256.1904296875, "loss": 0.3457, "rewards/accuracies": 0.75, "rewards/chosen": 0.14487504959106445, "rewards/margins": 2.0211057662963867, "rewards/rejected": -1.8762307167053223, "step": 7183 }, { "epoch": 0.83, "learning_rate": 5.2323539740138125e-08, "logits/chosen": -3.5443286895751953, "logits/rejected": -3.599041700363159, "logps/chosen": -198.54855346679688, "logps/rejected": -210.27264404296875, "loss": 0.2486, "rewards/accuracies": 1.0, "rewards/chosen": 0.1781499981880188, "rewards/margins": 1.9933366775512695, "rewards/rejected": -1.8151865005493164, "step": 7184 }, { "epoch": 0.83, "learning_rate": 5.2288423270513866e-08, "logits/chosen": -3.443084239959717, "logits/rejected": -3.182363986968994, "logps/chosen": -316.8511657714844, "logps/rejected": -204.3426513671875, "loss": 0.3839, "rewards/accuracies": 0.625, "rewards/chosen": 0.15805649757385254, "rewards/margins": 1.601384162902832, "rewards/rejected": -1.44332754611969, "step": 7185 }, { "epoch": 0.83, "learning_rate": 5.225330680088962e-08, "logits/chosen": -3.361208200454712, "logits/rejected": -3.5827627182006836, "logps/chosen": -175.9075927734375, "logps/rejected": -272.21466064453125, "loss": 0.4503, "rewards/accuracies": 0.75, "rewards/chosen": 0.2877669930458069, "rewards/margins": 2.4001147747039795, "rewards/rejected": -2.1123478412628174, "step": 7186 }, { "epoch": 0.83, "learning_rate": 5.221819033126536e-08, "logits/chosen": -2.6030163764953613, "logits/rejected": -2.63392972946167, "logps/chosen": -176.7958526611328, "logps/rejected": -189.87899780273438, "loss": 0.4649, "rewards/accuracies": 0.75, "rewards/chosen": 0.07603834569454193, "rewards/margins": 1.140049695968628, "rewards/rejected": -1.0640113353729248, "step": 7187 }, { "epoch": 0.83, "learning_rate": 5.2183073861641114e-08, "logits/chosen": -3.3674750328063965, "logits/rejected": -2.9322073459625244, "logps/chosen": -392.8912353515625, "logps/rejected": -294.6309814453125, "loss": 0.5877, "rewards/accuracies": 0.625, "rewards/chosen": -0.41470810770988464, "rewards/margins": 0.790001630783081, "rewards/rejected": -1.2047096490859985, "step": 7188 }, { "epoch": 0.83, "learning_rate": 5.2147957392016855e-08, "logits/chosen": -3.626079559326172, "logits/rejected": -3.2735772132873535, "logps/chosen": -221.61485290527344, "logps/rejected": -254.3193817138672, "loss": 0.4348, "rewards/accuracies": 0.875, "rewards/chosen": 0.03566461801528931, "rewards/margins": 0.9663242697715759, "rewards/rejected": -0.9306596517562866, "step": 7189 }, { "epoch": 0.83, "learning_rate": 5.21128409223926e-08, "logits/chosen": -3.012845516204834, "logits/rejected": -2.9319541454315186, "logps/chosen": -218.39321899414062, "logps/rejected": -237.50379943847656, "loss": 0.2191, "rewards/accuracies": 1.0, "rewards/chosen": 0.2104346752166748, "rewards/margins": 2.376249074935913, "rewards/rejected": -2.1658146381378174, "step": 7190 }, { "epoch": 0.83, "learning_rate": 5.207772445276834e-08, "logits/chosen": -2.4714152812957764, "logits/rejected": -2.6587796211242676, "logps/chosen": -303.0846252441406, "logps/rejected": -265.298095703125, "loss": 0.619, "rewards/accuracies": 0.625, "rewards/chosen": -0.0017559044063091278, "rewards/margins": 1.2833112478256226, "rewards/rejected": -1.285067081451416, "step": 7191 }, { "epoch": 0.83, "learning_rate": 5.2042607983144096e-08, "logits/chosen": -3.123079776763916, "logits/rejected": -2.7818398475646973, "logps/chosen": -325.539306640625, "logps/rejected": -290.9383544921875, "loss": 0.4078, "rewards/accuracies": 0.875, "rewards/chosen": -0.14213469624519348, "rewards/margins": 2.0117945671081543, "rewards/rejected": -2.1539292335510254, "step": 7192 }, { "epoch": 0.83, "learning_rate": 5.200749151351984e-08, "logits/chosen": -3.0964229106903076, "logits/rejected": -3.21736478805542, "logps/chosen": -156.13198852539062, "logps/rejected": -273.4672546386719, "loss": 0.1618, "rewards/accuracies": 1.0, "rewards/chosen": 0.23286586999893188, "rewards/margins": 3.0485332012176514, "rewards/rejected": -2.815667152404785, "step": 7193 }, { "epoch": 0.83, "learning_rate": 5.197237504389559e-08, "logits/chosen": -3.611042022705078, "logits/rejected": -3.4054136276245117, "logps/chosen": -316.1274108886719, "logps/rejected": -222.89141845703125, "loss": 0.3795, "rewards/accuracies": 0.75, "rewards/chosen": 0.2574393153190613, "rewards/margins": 2.368043899536133, "rewards/rejected": -2.1106045246124268, "step": 7194 }, { "epoch": 0.83, "learning_rate": 5.193725857427133e-08, "logits/chosen": -3.6205430030822754, "logits/rejected": -3.3656296730041504, "logps/chosen": -277.5799865722656, "logps/rejected": -245.41415405273438, "loss": 0.4914, "rewards/accuracies": 0.625, "rewards/chosen": -1.0701688528060913, "rewards/margins": 0.8755013942718506, "rewards/rejected": -1.945670247077942, "step": 7195 }, { "epoch": 0.83, "learning_rate": 5.190214210464707e-08, "logits/chosen": -3.636477470397949, "logits/rejected": -3.79390549659729, "logps/chosen": -334.841552734375, "logps/rejected": -255.5904998779297, "loss": 0.2382, "rewards/accuracies": 1.0, "rewards/chosen": 0.10335685312747955, "rewards/margins": 2.2148020267486572, "rewards/rejected": -2.111445426940918, "step": 7196 }, { "epoch": 0.83, "learning_rate": 5.1867025635022826e-08, "logits/chosen": -3.139993190765381, "logits/rejected": -3.1623871326446533, "logps/chosen": -91.03340148925781, "logps/rejected": -146.93853759765625, "loss": 0.4306, "rewards/accuracies": 1.0, "rewards/chosen": -0.0757780373096466, "rewards/margins": 1.5053956508636475, "rewards/rejected": -1.5811737775802612, "step": 7197 }, { "epoch": 0.83, "learning_rate": 5.1831909165398566e-08, "logits/chosen": -3.236751079559326, "logits/rejected": -2.9461381435394287, "logps/chosen": -220.8119659423828, "logps/rejected": -329.17755126953125, "loss": 0.2509, "rewards/accuracies": 0.875, "rewards/chosen": 0.01634964346885681, "rewards/margins": 2.453500509262085, "rewards/rejected": -2.4371509552001953, "step": 7198 }, { "epoch": 0.83, "learning_rate": 5.1796792695774314e-08, "logits/chosen": -2.899993658065796, "logits/rejected": -3.0135269165039062, "logps/chosen": -279.3945007324219, "logps/rejected": -219.54110717773438, "loss": 0.2633, "rewards/accuracies": 0.875, "rewards/chosen": 0.25677812099456787, "rewards/margins": 2.150317430496216, "rewards/rejected": -1.8935391902923584, "step": 7199 }, { "epoch": 0.83, "learning_rate": 5.176167622615006e-08, "logits/chosen": -3.0503158569335938, "logits/rejected": -2.8781580924987793, "logps/chosen": -190.38742065429688, "logps/rejected": -215.14903259277344, "loss": 0.2156, "rewards/accuracies": 1.0, "rewards/chosen": -0.24159660935401917, "rewards/margins": 2.0767951011657715, "rewards/rejected": -2.318391799926758, "step": 7200 }, { "epoch": 0.83, "learning_rate": 5.172655975652581e-08, "logits/chosen": -2.822378158569336, "logits/rejected": -2.9218201637268066, "logps/chosen": -293.5276794433594, "logps/rejected": -226.42941284179688, "loss": 0.2741, "rewards/accuracies": 1.0, "rewards/chosen": 0.048987261950969696, "rewards/margins": 1.9472087621688843, "rewards/rejected": -1.898221492767334, "step": 7201 }, { "epoch": 0.83, "learning_rate": 5.169144328690155e-08, "logits/chosen": -3.353611469268799, "logits/rejected": -3.1339752674102783, "logps/chosen": -135.82785034179688, "logps/rejected": -135.29037475585938, "loss": 0.3598, "rewards/accuracies": 0.875, "rewards/chosen": 0.1117946207523346, "rewards/margins": 1.3920656442642212, "rewards/rejected": -1.280271053314209, "step": 7202 }, { "epoch": 0.83, "learning_rate": 5.16563268172773e-08, "logits/chosen": -2.768587112426758, "logits/rejected": -3.0902342796325684, "logps/chosen": -141.28305053710938, "logps/rejected": -317.7546081542969, "loss": 0.3637, "rewards/accuracies": 0.75, "rewards/chosen": -0.047416456043720245, "rewards/margins": 1.8709005117416382, "rewards/rejected": -1.9183168411254883, "step": 7203 }, { "epoch": 0.83, "learning_rate": 5.162121034765304e-08, "logits/chosen": -3.4882473945617676, "logits/rejected": -3.610447883605957, "logps/chosen": -129.71070861816406, "logps/rejected": -211.37588500976562, "loss": 0.4796, "rewards/accuracies": 0.625, "rewards/chosen": -0.7566113471984863, "rewards/margins": 1.654716968536377, "rewards/rejected": -2.4113283157348633, "step": 7204 }, { "epoch": 0.83, "learning_rate": 5.15860938780288e-08, "logits/chosen": -3.099691867828369, "logits/rejected": -3.1153671741485596, "logps/chosen": -327.21533203125, "logps/rejected": -210.59390258789062, "loss": 0.35, "rewards/accuracies": 0.875, "rewards/chosen": 0.14276161789894104, "rewards/margins": 1.8347703218460083, "rewards/rejected": -1.6920088529586792, "step": 7205 }, { "epoch": 0.83, "learning_rate": 5.155097740840454e-08, "logits/chosen": -3.4056687355041504, "logits/rejected": -3.2464683055877686, "logps/chosen": -217.2108917236328, "logps/rejected": -291.6184387207031, "loss": 0.3716, "rewards/accuracies": 1.0, "rewards/chosen": -0.5433745384216309, "rewards/margins": 1.016566514968872, "rewards/rejected": -1.5599409341812134, "step": 7206 }, { "epoch": 0.83, "learning_rate": 5.1515860938780285e-08, "logits/chosen": -2.0250158309936523, "logits/rejected": -2.2212419509887695, "logps/chosen": -479.78106689453125, "logps/rejected": -411.8062744140625, "loss": 0.6131, "rewards/accuracies": 0.75, "rewards/chosen": -0.1372247189283371, "rewards/margins": 1.1852023601531982, "rewards/rejected": -1.3224271535873413, "step": 7207 }, { "epoch": 0.83, "learning_rate": 5.1480744469156026e-08, "logits/chosen": -2.562403440475464, "logits/rejected": -2.5418102741241455, "logps/chosen": -349.981201171875, "logps/rejected": -278.78753662109375, "loss": 0.2866, "rewards/accuracies": 0.875, "rewards/chosen": -0.3121623992919922, "rewards/margins": 2.5945208072662354, "rewards/rejected": -2.9066832065582275, "step": 7208 }, { "epoch": 0.83, "learning_rate": 5.144562799953178e-08, "logits/chosen": -2.324127197265625, "logits/rejected": -2.371750593185425, "logps/chosen": -321.9378662109375, "logps/rejected": -198.25218200683594, "loss": 0.1623, "rewards/accuracies": 1.0, "rewards/chosen": 0.36530056595802307, "rewards/margins": 2.1439261436462402, "rewards/rejected": -1.778625726699829, "step": 7209 }, { "epoch": 0.83, "learning_rate": 5.141051152990752e-08, "logits/chosen": -3.348931074142456, "logits/rejected": -3.3629398345947266, "logps/chosen": -217.61553955078125, "logps/rejected": -209.7340087890625, "loss": 0.5515, "rewards/accuracies": 0.75, "rewards/chosen": -0.35177645087242126, "rewards/margins": 0.9011094570159912, "rewards/rejected": -1.2528859376907349, "step": 7210 }, { "epoch": 0.83, "learning_rate": 5.1375395060283274e-08, "logits/chosen": -3.1566390991210938, "logits/rejected": -3.124048948287964, "logps/chosen": -192.31640625, "logps/rejected": -183.98548889160156, "loss": 0.2128, "rewards/accuracies": 1.0, "rewards/chosen": 0.726064145565033, "rewards/margins": 2.169632911682129, "rewards/rejected": -1.4435688257217407, "step": 7211 }, { "epoch": 0.83, "learning_rate": 5.1340278590659015e-08, "logits/chosen": -2.5206034183502197, "logits/rejected": -2.8013477325439453, "logps/chosen": -390.63671875, "logps/rejected": -302.86993408203125, "loss": 0.2822, "rewards/accuracies": 0.875, "rewards/chosen": 0.0093747079372406, "rewards/margins": 2.381688356399536, "rewards/rejected": -2.3723137378692627, "step": 7212 }, { "epoch": 0.83, "learning_rate": 5.130516212103476e-08, "logits/chosen": -2.8184759616851807, "logits/rejected": -2.9844157695770264, "logps/chosen": -229.88758850097656, "logps/rejected": -334.8213806152344, "loss": 0.6267, "rewards/accuracies": 0.5, "rewards/chosen": -0.20644442737102509, "rewards/margins": 1.112413763999939, "rewards/rejected": -1.3188581466674805, "step": 7213 }, { "epoch": 0.83, "learning_rate": 5.127004565141051e-08, "logits/chosen": -3.7543506622314453, "logits/rejected": -3.337465286254883, "logps/chosen": -441.5437316894531, "logps/rejected": -210.23257446289062, "loss": 0.2485, "rewards/accuracies": 1.0, "rewards/chosen": 0.004851162433624268, "rewards/margins": 2.1254005432128906, "rewards/rejected": -2.120549440383911, "step": 7214 }, { "epoch": 0.83, "learning_rate": 5.1234929181786256e-08, "logits/chosen": -2.5159685611724854, "logits/rejected": -2.5104219913482666, "logps/chosen": -213.26303100585938, "logps/rejected": -306.47900390625, "loss": 0.4134, "rewards/accuracies": 0.875, "rewards/chosen": -0.3495781421661377, "rewards/margins": 2.225933074951172, "rewards/rejected": -2.5755109786987305, "step": 7215 }, { "epoch": 0.83, "learning_rate": 5.1199812712162e-08, "logits/chosen": -2.96331787109375, "logits/rejected": -2.8253655433654785, "logps/chosen": -318.27252197265625, "logps/rejected": -357.6860046386719, "loss": 0.1702, "rewards/accuracies": 1.0, "rewards/chosen": 0.12684068083763123, "rewards/margins": 3.799116611480713, "rewards/rejected": -3.672276020050049, "step": 7216 }, { "epoch": 0.83, "learning_rate": 5.116469624253775e-08, "logits/chosen": -3.1732683181762695, "logits/rejected": -3.14555025100708, "logps/chosen": -294.1593017578125, "logps/rejected": -205.81202697753906, "loss": 0.5041, "rewards/accuracies": 0.75, "rewards/chosen": 0.18109780550003052, "rewards/margins": 1.5081470012664795, "rewards/rejected": -1.3270492553710938, "step": 7217 }, { "epoch": 0.83, "learning_rate": 5.112957977291349e-08, "logits/chosen": -2.6620919704437256, "logits/rejected": -2.485532283782959, "logps/chosen": -229.7654266357422, "logps/rejected": -450.20745849609375, "loss": 0.6689, "rewards/accuracies": 0.75, "rewards/chosen": -0.5382860898971558, "rewards/margins": 2.340419292449951, "rewards/rejected": -2.8787052631378174, "step": 7218 }, { "epoch": 0.83, "learning_rate": 5.1094463303289245e-08, "logits/chosen": -2.8578553199768066, "logits/rejected": -2.8441576957702637, "logps/chosen": -167.7171173095703, "logps/rejected": -595.407470703125, "loss": 0.334, "rewards/accuracies": 0.875, "rewards/chosen": -0.3701966404914856, "rewards/margins": 1.8665001392364502, "rewards/rejected": -2.236696720123291, "step": 7219 }, { "epoch": 0.83, "learning_rate": 5.1059346833664986e-08, "logits/chosen": -2.906580924987793, "logits/rejected": -2.7348132133483887, "logps/chosen": -194.05923461914062, "logps/rejected": -294.0055847167969, "loss": 0.3101, "rewards/accuracies": 0.875, "rewards/chosen": -0.36083829402923584, "rewards/margins": 1.8688404560089111, "rewards/rejected": -2.2296786308288574, "step": 7220 }, { "epoch": 0.83, "learning_rate": 5.102423036404073e-08, "logits/chosen": -3.084887981414795, "logits/rejected": -3.2139639854431152, "logps/chosen": -369.2535400390625, "logps/rejected": -277.60015869140625, "loss": 0.4728, "rewards/accuracies": 0.75, "rewards/chosen": -0.2462189793586731, "rewards/margins": 1.4211320877075195, "rewards/rejected": -1.6673511266708374, "step": 7221 }, { "epoch": 0.83, "learning_rate": 5.098911389441648e-08, "logits/chosen": -3.1356067657470703, "logits/rejected": -3.196457624435425, "logps/chosen": -239.6969451904297, "logps/rejected": -234.3162841796875, "loss": 0.3056, "rewards/accuracies": 0.875, "rewards/chosen": 0.5528683662414551, "rewards/margins": 1.5487558841705322, "rewards/rejected": -0.9958874583244324, "step": 7222 }, { "epoch": 0.83, "learning_rate": 5.095399742479223e-08, "logits/chosen": -2.8404672145843506, "logits/rejected": -2.8910112380981445, "logps/chosen": -278.107666015625, "logps/rejected": -287.7952880859375, "loss": 0.2924, "rewards/accuracies": 0.875, "rewards/chosen": -0.3784114122390747, "rewards/margins": 2.1610982418060303, "rewards/rejected": -2.5395095348358154, "step": 7223 }, { "epoch": 0.83, "learning_rate": 5.091888095516797e-08, "logits/chosen": -2.432145833969116, "logits/rejected": -2.7687366008758545, "logps/chosen": -316.23309326171875, "logps/rejected": -257.31146240234375, "loss": 0.652, "rewards/accuracies": 0.75, "rewards/chosen": -0.4711979627609253, "rewards/margins": 0.6853963136672974, "rewards/rejected": -1.1565942764282227, "step": 7224 }, { "epoch": 0.83, "learning_rate": 5.088376448554372e-08, "logits/chosen": -3.3834495544433594, "logits/rejected": -3.5673608779907227, "logps/chosen": -198.2320556640625, "logps/rejected": -154.30911254882812, "loss": 0.3054, "rewards/accuracies": 0.875, "rewards/chosen": -0.3147566616535187, "rewards/margins": 1.768181324005127, "rewards/rejected": -2.0829379558563232, "step": 7225 }, { "epoch": 0.83, "learning_rate": 5.084864801591946e-08, "logits/chosen": -2.768056631088257, "logits/rejected": -2.564816474914551, "logps/chosen": -252.56637573242188, "logps/rejected": -197.78660583496094, "loss": 0.3662, "rewards/accuracies": 0.875, "rewards/chosen": 0.06370498239994049, "rewards/margins": 1.0651905536651611, "rewards/rejected": -1.0014855861663818, "step": 7226 }, { "epoch": 0.83, "learning_rate": 5.081353154629521e-08, "logits/chosen": -2.748734474182129, "logits/rejected": -2.8341104984283447, "logps/chosen": -226.97833251953125, "logps/rejected": -319.068359375, "loss": 0.3362, "rewards/accuracies": 0.875, "rewards/chosen": -0.33532992005348206, "rewards/margins": 2.2578370571136475, "rewards/rejected": -2.5931670665740967, "step": 7227 }, { "epoch": 0.83, "learning_rate": 5.077841507667096e-08, "logits/chosen": -3.666344165802002, "logits/rejected": -3.667032241821289, "logps/chosen": -193.7274169921875, "logps/rejected": -278.1099548339844, "loss": 0.6655, "rewards/accuracies": 0.75, "rewards/chosen": -1.2482751607894897, "rewards/margins": 0.45893168449401855, "rewards/rejected": -1.7072067260742188, "step": 7228 }, { "epoch": 0.83, "learning_rate": 5.0743298607046704e-08, "logits/chosen": -3.3546183109283447, "logits/rejected": -3.21582293510437, "logps/chosen": -212.4844207763672, "logps/rejected": -282.94671630859375, "loss": 0.3959, "rewards/accuracies": 0.875, "rewards/chosen": -0.5599007606506348, "rewards/margins": 1.969846487045288, "rewards/rejected": -2.529747247695923, "step": 7229 }, { "epoch": 0.83, "learning_rate": 5.0708182137422445e-08, "logits/chosen": -2.17800235748291, "logits/rejected": -2.0206480026245117, "logps/chosen": -295.41522216796875, "logps/rejected": -363.5266418457031, "loss": 0.2518, "rewards/accuracies": 0.875, "rewards/chosen": 0.036778099834918976, "rewards/margins": 2.0522620677948, "rewards/rejected": -2.015484094619751, "step": 7230 }, { "epoch": 0.83, "learning_rate": 5.06730656677982e-08, "logits/chosen": -2.4390439987182617, "logits/rejected": -2.4294915199279785, "logps/chosen": -263.84326171875, "logps/rejected": -312.64697265625, "loss": 0.1233, "rewards/accuracies": 1.0, "rewards/chosen": 0.5868321061134338, "rewards/margins": 3.057685136795044, "rewards/rejected": -2.470852851867676, "step": 7231 }, { "epoch": 0.83, "learning_rate": 5.063794919817394e-08, "logits/chosen": -3.682229995727539, "logits/rejected": -3.980869770050049, "logps/chosen": -135.2556915283203, "logps/rejected": -179.06053161621094, "loss": 0.5635, "rewards/accuracies": 0.875, "rewards/chosen": -0.08606965839862823, "rewards/margins": 0.59857177734375, "rewards/rejected": -0.6846414804458618, "step": 7232 }, { "epoch": 0.83, "learning_rate": 5.0602832728549693e-08, "logits/chosen": -3.0537166595458984, "logits/rejected": -2.945998191833496, "logps/chosen": -248.93743896484375, "logps/rejected": -200.779296875, "loss": 0.3459, "rewards/accuracies": 0.875, "rewards/chosen": 0.062008775770664215, "rewards/margins": 2.0428574085235596, "rewards/rejected": -1.9808486700057983, "step": 7233 }, { "epoch": 0.83, "learning_rate": 5.0567716258925434e-08, "logits/chosen": -3.6369733810424805, "logits/rejected": -3.5469794273376465, "logps/chosen": -301.2701721191406, "logps/rejected": -249.73939514160156, "loss": 0.2107, "rewards/accuracies": 0.875, "rewards/chosen": 0.32317203283309937, "rewards/margins": 3.754246711730957, "rewards/rejected": -3.431074619293213, "step": 7234 }, { "epoch": 0.83, "learning_rate": 5.053259978930118e-08, "logits/chosen": -2.945077657699585, "logits/rejected": -2.824096441268921, "logps/chosen": -293.1856689453125, "logps/rejected": -293.32952880859375, "loss": 0.1586, "rewards/accuracies": 0.875, "rewards/chosen": 0.22017619013786316, "rewards/margins": 3.155290365219116, "rewards/rejected": -2.9351143836975098, "step": 7235 }, { "epoch": 0.83, "learning_rate": 5.049748331967693e-08, "logits/chosen": -3.467777967453003, "logits/rejected": -2.99243426322937, "logps/chosen": -391.2261962890625, "logps/rejected": -267.9790344238281, "loss": 0.3607, "rewards/accuracies": 0.75, "rewards/chosen": 0.334145188331604, "rewards/margins": 2.3822762966156006, "rewards/rejected": -2.048130989074707, "step": 7236 }, { "epoch": 0.83, "learning_rate": 5.0462366850052676e-08, "logits/chosen": -2.9586586952209473, "logits/rejected": -2.5912508964538574, "logps/chosen": -357.03900146484375, "logps/rejected": -286.2179260253906, "loss": 0.1497, "rewards/accuracies": 1.0, "rewards/chosen": 0.5359609723091125, "rewards/margins": 2.467419385910034, "rewards/rejected": -1.9314583539962769, "step": 7237 }, { "epoch": 0.83, "learning_rate": 5.0427250380428416e-08, "logits/chosen": -3.7321577072143555, "logits/rejected": -3.539353132247925, "logps/chosen": -205.33250427246094, "logps/rejected": -131.23487854003906, "loss": 0.4874, "rewards/accuracies": 0.625, "rewards/chosen": -0.20737551152706146, "rewards/margins": 1.2731231451034546, "rewards/rejected": -1.4804986715316772, "step": 7238 }, { "epoch": 0.83, "learning_rate": 5.039213391080417e-08, "logits/chosen": -3.347930431365967, "logits/rejected": -3.473954916000366, "logps/chosen": -258.64031982421875, "logps/rejected": -271.65936279296875, "loss": 0.3053, "rewards/accuracies": 0.875, "rewards/chosen": 0.21302083134651184, "rewards/margins": 2.312222719192505, "rewards/rejected": -2.0992019176483154, "step": 7239 }, { "epoch": 0.83, "learning_rate": 5.035701744117991e-08, "logits/chosen": -3.5192980766296387, "logits/rejected": -3.468867778778076, "logps/chosen": -249.80577087402344, "logps/rejected": -132.6614227294922, "loss": 0.3805, "rewards/accuracies": 0.75, "rewards/chosen": 0.42423000931739807, "rewards/margins": 1.6800646781921387, "rewards/rejected": -1.255834698677063, "step": 7240 }, { "epoch": 0.83, "learning_rate": 5.0321900971555665e-08, "logits/chosen": -3.075322389602661, "logits/rejected": -3.3324501514434814, "logps/chosen": -236.03492736816406, "logps/rejected": -216.5542449951172, "loss": 0.3611, "rewards/accuracies": 0.75, "rewards/chosen": 0.21005485951900482, "rewards/margins": 2.3397347927093506, "rewards/rejected": -2.1296796798706055, "step": 7241 }, { "epoch": 0.83, "learning_rate": 5.0286784501931405e-08, "logits/chosen": -2.9063029289245605, "logits/rejected": -3.1994709968566895, "logps/chosen": -96.35302734375, "logps/rejected": -187.04896545410156, "loss": 0.3163, "rewards/accuracies": 1.0, "rewards/chosen": 0.15326303243637085, "rewards/margins": 1.758339285850525, "rewards/rejected": -1.6050763130187988, "step": 7242 }, { "epoch": 0.83, "learning_rate": 5.0251668032307146e-08, "logits/chosen": -2.896296977996826, "logits/rejected": -3.020688056945801, "logps/chosen": -148.65745544433594, "logps/rejected": -245.28164672851562, "loss": 0.1267, "rewards/accuracies": 1.0, "rewards/chosen": 0.014453485608100891, "rewards/margins": 3.4737002849578857, "rewards/rejected": -3.4592466354370117, "step": 7243 }, { "epoch": 0.84, "learning_rate": 5.021655156268289e-08, "logits/chosen": -3.448168992996216, "logits/rejected": -3.0071046352386475, "logps/chosen": -219.86044311523438, "logps/rejected": -175.07923889160156, "loss": 0.3534, "rewards/accuracies": 0.625, "rewards/chosen": -0.2569941580295563, "rewards/margins": 2.499329090118408, "rewards/rejected": -2.7563230991363525, "step": 7244 }, { "epoch": 0.84, "learning_rate": 5.018143509305864e-08, "logits/chosen": -3.0672767162323, "logits/rejected": -2.4708375930786133, "logps/chosen": -203.8226776123047, "logps/rejected": -227.91311645507812, "loss": 0.2793, "rewards/accuracies": 1.0, "rewards/chosen": -0.09652988612651825, "rewards/margins": 1.445613145828247, "rewards/rejected": -1.5421431064605713, "step": 7245 }, { "epoch": 0.84, "learning_rate": 5.014631862343439e-08, "logits/chosen": -3.2285068035125732, "logits/rejected": -3.23307466506958, "logps/chosen": -132.4745330810547, "logps/rejected": -157.16644287109375, "loss": 0.4672, "rewards/accuracies": 0.625, "rewards/chosen": 0.09372683614492416, "rewards/margins": 1.4251713752746582, "rewards/rejected": -1.331444501876831, "step": 7246 }, { "epoch": 0.84, "learning_rate": 5.011120215381013e-08, "logits/chosen": -2.9865612983703613, "logits/rejected": -2.824361801147461, "logps/chosen": -438.48919677734375, "logps/rejected": -387.7099609375, "loss": 0.1639, "rewards/accuracies": 1.0, "rewards/chosen": 0.14505872130393982, "rewards/margins": 2.6284122467041016, "rewards/rejected": -2.48335337638855, "step": 7247 }, { "epoch": 0.84, "learning_rate": 5.007608568418588e-08, "logits/chosen": -3.425402879714966, "logits/rejected": -3.5531206130981445, "logps/chosen": -213.7608642578125, "logps/rejected": -115.87738037109375, "loss": 0.2962, "rewards/accuracies": 0.875, "rewards/chosen": 0.21418142318725586, "rewards/margins": 1.8235903978347778, "rewards/rejected": -1.609408974647522, "step": 7248 }, { "epoch": 0.84, "learning_rate": 5.004096921456162e-08, "logits/chosen": -2.8044662475585938, "logits/rejected": -2.8064823150634766, "logps/chosen": -364.1567687988281, "logps/rejected": -322.76470947265625, "loss": 0.7284, "rewards/accuracies": 0.625, "rewards/chosen": -1.3767805099487305, "rewards/margins": 0.5106115341186523, "rewards/rejected": -1.8873920440673828, "step": 7249 }, { "epoch": 0.84, "learning_rate": 5.0005852744937377e-08, "logits/chosen": -2.4771475791931152, "logits/rejected": -2.402222156524658, "logps/chosen": -218.1021728515625, "logps/rejected": -255.9054412841797, "loss": 0.428, "rewards/accuracies": 0.75, "rewards/chosen": -0.2969256639480591, "rewards/margins": 0.9961888790130615, "rewards/rejected": -1.2931146621704102, "step": 7250 }, { "epoch": 0.84, "learning_rate": 4.997073627531312e-08, "logits/chosen": -3.127955675125122, "logits/rejected": -3.6149754524230957, "logps/chosen": -238.84793090820312, "logps/rejected": -196.8673858642578, "loss": 0.3447, "rewards/accuracies": 0.875, "rewards/chosen": -0.49891865253448486, "rewards/margins": 1.5283573865890503, "rewards/rejected": -2.027275800704956, "step": 7251 }, { "epoch": 0.84, "learning_rate": 4.9935619805688864e-08, "logits/chosen": -3.916311264038086, "logits/rejected": -4.003241062164307, "logps/chosen": -270.83734130859375, "logps/rejected": -248.2839813232422, "loss": 0.3046, "rewards/accuracies": 0.875, "rewards/chosen": -0.5412649512290955, "rewards/margins": 1.9029667377471924, "rewards/rejected": -2.4442317485809326, "step": 7252 }, { "epoch": 0.84, "learning_rate": 4.990050333606461e-08, "logits/chosen": -2.9026429653167725, "logits/rejected": -3.0897152423858643, "logps/chosen": -339.25823974609375, "logps/rejected": -422.83331298828125, "loss": 0.3412, "rewards/accuracies": 0.75, "rewards/chosen": 0.0689205527305603, "rewards/margins": 2.0260913372039795, "rewards/rejected": -1.9571707248687744, "step": 7253 }, { "epoch": 0.84, "learning_rate": 4.986538686644036e-08, "logits/chosen": -3.273560047149658, "logits/rejected": -3.186000347137451, "logps/chosen": -232.13180541992188, "logps/rejected": -264.48175048828125, "loss": 0.5452, "rewards/accuracies": 0.625, "rewards/chosen": -0.791731059551239, "rewards/margins": 2.1436400413513184, "rewards/rejected": -2.935370922088623, "step": 7254 }, { "epoch": 0.84, "learning_rate": 4.98302703968161e-08, "logits/chosen": -2.9774131774902344, "logits/rejected": -3.418776273727417, "logps/chosen": -205.2285614013672, "logps/rejected": -240.7371826171875, "loss": 0.1197, "rewards/accuracies": 1.0, "rewards/chosen": 0.7197244167327881, "rewards/margins": 3.134186267852783, "rewards/rejected": -2.414462089538574, "step": 7255 }, { "epoch": 0.84, "learning_rate": 4.9795153927191853e-08, "logits/chosen": -2.7514281272888184, "logits/rejected": -2.8240041732788086, "logps/chosen": -359.6626281738281, "logps/rejected": -293.65380859375, "loss": 0.1972, "rewards/accuracies": 0.875, "rewards/chosen": -0.07779745012521744, "rewards/margins": 3.210418701171875, "rewards/rejected": -3.2882163524627686, "step": 7256 }, { "epoch": 0.84, "learning_rate": 4.9760037457567594e-08, "logits/chosen": -2.7410988807678223, "logits/rejected": -2.9195165634155273, "logps/chosen": -254.07411193847656, "logps/rejected": -241.51846313476562, "loss": 0.396, "rewards/accuracies": 0.875, "rewards/chosen": -0.21580596268177032, "rewards/margins": 1.2213687896728516, "rewards/rejected": -1.437174677848816, "step": 7257 }, { "epoch": 0.84, "learning_rate": 4.972492098794335e-08, "logits/chosen": -2.988142728805542, "logits/rejected": -2.8688008785247803, "logps/chosen": -307.84442138671875, "logps/rejected": -313.1390380859375, "loss": 0.1684, "rewards/accuracies": 1.0, "rewards/chosen": 0.32012486457824707, "rewards/margins": 2.4511961936950684, "rewards/rejected": -2.1310713291168213, "step": 7258 }, { "epoch": 0.84, "learning_rate": 4.968980451831909e-08, "logits/chosen": -3.0733094215393066, "logits/rejected": -3.0168542861938477, "logps/chosen": -218.33883666992188, "logps/rejected": -149.92886352539062, "loss": 0.5113, "rewards/accuracies": 0.875, "rewards/chosen": -0.2758692800998688, "rewards/margins": 0.5334983468055725, "rewards/rejected": -0.8093676567077637, "step": 7259 }, { "epoch": 0.84, "learning_rate": 4.9654688048694836e-08, "logits/chosen": -2.2442948818206787, "logits/rejected": -2.142073631286621, "logps/chosen": -206.916259765625, "logps/rejected": -249.69842529296875, "loss": 0.3911, "rewards/accuracies": 0.875, "rewards/chosen": -0.015127800405025482, "rewards/margins": 1.2381527423858643, "rewards/rejected": -1.2532804012298584, "step": 7260 }, { "epoch": 0.84, "learning_rate": 4.9619571579070576e-08, "logits/chosen": -2.050347328186035, "logits/rejected": -1.8694854974746704, "logps/chosen": -292.9566955566406, "logps/rejected": -397.5107421875, "loss": 0.4842, "rewards/accuracies": 0.75, "rewards/chosen": 0.06792230159044266, "rewards/margins": 1.1243079900741577, "rewards/rejected": -1.0563856363296509, "step": 7261 }, { "epoch": 0.84, "learning_rate": 4.958445510944633e-08, "logits/chosen": -3.5581984519958496, "logits/rejected": -3.472510814666748, "logps/chosen": -292.0977783203125, "logps/rejected": -161.60052490234375, "loss": 0.4383, "rewards/accuracies": 0.75, "rewards/chosen": -0.8429885506629944, "rewards/margins": 1.1656779050827026, "rewards/rejected": -2.008666515350342, "step": 7262 }, { "epoch": 0.84, "learning_rate": 4.954933863982207e-08, "logits/chosen": -2.56957745552063, "logits/rejected": -2.385180711746216, "logps/chosen": -490.3033447265625, "logps/rejected": -488.3107604980469, "loss": 0.3647, "rewards/accuracies": 0.75, "rewards/chosen": 0.5026763081550598, "rewards/margins": 1.6816668510437012, "rewards/rejected": -1.1789904832839966, "step": 7263 }, { "epoch": 0.84, "learning_rate": 4.9514222170197825e-08, "logits/chosen": -2.736356019973755, "logits/rejected": -2.840841293334961, "logps/chosen": -332.1766662597656, "logps/rejected": -302.17535400390625, "loss": 0.407, "rewards/accuracies": 0.875, "rewards/chosen": -0.38550567626953125, "rewards/margins": 1.1064305305480957, "rewards/rejected": -1.491936206817627, "step": 7264 }, { "epoch": 0.84, "learning_rate": 4.9479105700573565e-08, "logits/chosen": -3.0711867809295654, "logits/rejected": -2.6153929233551025, "logps/chosen": -309.967529296875, "logps/rejected": -262.7599792480469, "loss": 0.3124, "rewards/accuracies": 0.875, "rewards/chosen": 0.18580739200115204, "rewards/margins": 2.1398074626922607, "rewards/rejected": -1.9540001153945923, "step": 7265 }, { "epoch": 0.84, "learning_rate": 4.944398923094931e-08, "logits/chosen": -2.8674166202545166, "logits/rejected": -2.814300298690796, "logps/chosen": -228.36959838867188, "logps/rejected": -248.02850341796875, "loss": 0.3661, "rewards/accuracies": 0.75, "rewards/chosen": -0.19352349638938904, "rewards/margins": 1.734015703201294, "rewards/rejected": -1.927539348602295, "step": 7266 }, { "epoch": 0.84, "learning_rate": 4.940887276132506e-08, "logits/chosen": -2.610485792160034, "logits/rejected": -2.459897756576538, "logps/chosen": -257.58111572265625, "logps/rejected": -268.7911071777344, "loss": 0.5117, "rewards/accuracies": 0.625, "rewards/chosen": -0.0519716739654541, "rewards/margins": 1.9205138683319092, "rewards/rejected": -1.9724856615066528, "step": 7267 }, { "epoch": 0.84, "learning_rate": 4.937375629170081e-08, "logits/chosen": -3.2745320796966553, "logits/rejected": -3.3858420848846436, "logps/chosen": -138.09228515625, "logps/rejected": -228.33609008789062, "loss": 0.7236, "rewards/accuracies": 0.5, "rewards/chosen": -0.8138493299484253, "rewards/margins": 0.8385409116744995, "rewards/rejected": -1.6523903608322144, "step": 7268 }, { "epoch": 0.84, "learning_rate": 4.933863982207655e-08, "logits/chosen": -2.5273661613464355, "logits/rejected": -2.674689531326294, "logps/chosen": -345.9493103027344, "logps/rejected": -266.034423828125, "loss": 0.7288, "rewards/accuracies": 0.75, "rewards/chosen": 0.020972460508346558, "rewards/margins": 1.1111276149749756, "rewards/rejected": -1.0901551246643066, "step": 7269 }, { "epoch": 0.84, "learning_rate": 4.93035233524523e-08, "logits/chosen": -3.4082188606262207, "logits/rejected": -3.466184377670288, "logps/chosen": -235.87144470214844, "logps/rejected": -366.4646301269531, "loss": 0.6429, "rewards/accuracies": 0.625, "rewards/chosen": -0.4190317392349243, "rewards/margins": 0.8734185695648193, "rewards/rejected": -1.2924504280090332, "step": 7270 }, { "epoch": 0.84, "learning_rate": 4.926840688282804e-08, "logits/chosen": -2.5614054203033447, "logits/rejected": -2.43047833442688, "logps/chosen": -152.64076232910156, "logps/rejected": -233.89634704589844, "loss": 0.339, "rewards/accuracies": 0.875, "rewards/chosen": 0.011704586446285248, "rewards/margins": 1.699904441833496, "rewards/rejected": -1.688199758529663, "step": 7271 }, { "epoch": 0.84, "learning_rate": 4.9233290413203796e-08, "logits/chosen": -3.010301113128662, "logits/rejected": -3.005540609359741, "logps/chosen": -157.85079956054688, "logps/rejected": -173.25262451171875, "loss": 0.4327, "rewards/accuracies": 0.75, "rewards/chosen": 0.1290259063243866, "rewards/margins": 1.1687648296356201, "rewards/rejected": -1.0397388935089111, "step": 7272 }, { "epoch": 0.84, "learning_rate": 4.9198173943579536e-08, "logits/chosen": -3.093505859375, "logits/rejected": -3.0845139026641846, "logps/chosen": -255.22900390625, "logps/rejected": -300.4136657714844, "loss": 0.4937, "rewards/accuracies": 0.625, "rewards/chosen": 0.18153756856918335, "rewards/margins": 1.9888190031051636, "rewards/rejected": -1.807281494140625, "step": 7273 }, { "epoch": 0.84, "learning_rate": 4.9163057473955284e-08, "logits/chosen": -2.9446828365325928, "logits/rejected": -2.702209949493408, "logps/chosen": -218.6045379638672, "logps/rejected": -396.9185791015625, "loss": 1.0241, "rewards/accuracies": 0.5, "rewards/chosen": -0.9380279779434204, "rewards/margins": -0.0010988116264343262, "rewards/rejected": -0.9369291067123413, "step": 7274 }, { "epoch": 0.84, "learning_rate": 4.912794100433103e-08, "logits/chosen": -3.0684814453125, "logits/rejected": -3.1150918006896973, "logps/chosen": -281.46478271484375, "logps/rejected": -408.67950439453125, "loss": 0.1734, "rewards/accuracies": 1.0, "rewards/chosen": -0.04981401562690735, "rewards/margins": 2.914843797683716, "rewards/rejected": -2.964657783508301, "step": 7275 }, { "epoch": 0.84, "learning_rate": 4.909282453470678e-08, "logits/chosen": -3.40449857711792, "logits/rejected": -3.3501219749450684, "logps/chosen": -210.17259216308594, "logps/rejected": -248.86895751953125, "loss": 0.1804, "rewards/accuracies": 1.0, "rewards/chosen": -0.07876773178577423, "rewards/margins": 2.6153295040130615, "rewards/rejected": -2.6940970420837402, "step": 7276 }, { "epoch": 0.84, "learning_rate": 4.905770806508252e-08, "logits/chosen": -2.9393742084503174, "logits/rejected": -3.0858113765716553, "logps/chosen": -245.57765197753906, "logps/rejected": -315.68597412109375, "loss": 0.1877, "rewards/accuracies": 1.0, "rewards/chosen": -0.17368200421333313, "rewards/margins": 2.679633617401123, "rewards/rejected": -2.853315830230713, "step": 7277 }, { "epoch": 0.84, "learning_rate": 4.902259159545827e-08, "logits/chosen": -3.411923885345459, "logits/rejected": -3.452000617980957, "logps/chosen": -166.6035614013672, "logps/rejected": -238.7212677001953, "loss": 0.1763, "rewards/accuracies": 1.0, "rewards/chosen": 0.08537646383047104, "rewards/margins": 3.3329763412475586, "rewards/rejected": -3.2475998401641846, "step": 7278 }, { "epoch": 0.84, "learning_rate": 4.898747512583401e-08, "logits/chosen": -2.8583695888519287, "logits/rejected": -3.092001438140869, "logps/chosen": -168.74026489257812, "logps/rejected": -158.2676544189453, "loss": 0.2716, "rewards/accuracies": 0.875, "rewards/chosen": 0.0483776330947876, "rewards/margins": 2.0308406352996826, "rewards/rejected": -1.9824631214141846, "step": 7279 }, { "epoch": 0.84, "learning_rate": 4.895235865620976e-08, "logits/chosen": -3.1074063777923584, "logits/rejected": -3.254213333129883, "logps/chosen": -276.8778076171875, "logps/rejected": -323.763671875, "loss": 0.2042, "rewards/accuracies": 1.0, "rewards/chosen": 0.27058011293411255, "rewards/margins": 2.574982166290283, "rewards/rejected": -2.3044021129608154, "step": 7280 }, { "epoch": 0.84, "learning_rate": 4.891724218658551e-08, "logits/chosen": -3.087440252304077, "logits/rejected": -3.165618419647217, "logps/chosen": -226.5810546875, "logps/rejected": -188.57327270507812, "loss": 0.8235, "rewards/accuracies": 0.625, "rewards/chosen": -1.1311836242675781, "rewards/margins": 0.38190758228302, "rewards/rejected": -1.5130912065505981, "step": 7281 }, { "epoch": 0.84, "learning_rate": 4.8882125716961255e-08, "logits/chosen": -2.828007936477661, "logits/rejected": -2.893136501312256, "logps/chosen": -198.54664611816406, "logps/rejected": -234.611572265625, "loss": 0.381, "rewards/accuracies": 0.875, "rewards/chosen": 0.3042939305305481, "rewards/margins": 1.5410435199737549, "rewards/rejected": -1.2367496490478516, "step": 7282 }, { "epoch": 0.84, "learning_rate": 4.8847009247336996e-08, "logits/chosen": -3.4766130447387695, "logits/rejected": -3.467379093170166, "logps/chosen": -204.6961669921875, "logps/rejected": -179.06390380859375, "loss": 0.4449, "rewards/accuracies": 0.75, "rewards/chosen": -0.18237724900245667, "rewards/margins": 3.2531626224517822, "rewards/rejected": -3.435539960861206, "step": 7283 }, { "epoch": 0.84, "learning_rate": 4.881189277771275e-08, "logits/chosen": -2.37133526802063, "logits/rejected": -2.424757480621338, "logps/chosen": -399.0733642578125, "logps/rejected": -490.33734130859375, "loss": 0.4331, "rewards/accuracies": 0.875, "rewards/chosen": 0.2209317982196808, "rewards/margins": 1.3591325283050537, "rewards/rejected": -1.1382006406784058, "step": 7284 }, { "epoch": 0.84, "learning_rate": 4.877677630808849e-08, "logits/chosen": -3.3877601623535156, "logits/rejected": -3.4637575149536133, "logps/chosen": -232.30972290039062, "logps/rejected": -279.6500244140625, "loss": 0.7722, "rewards/accuracies": 0.75, "rewards/chosen": -0.35711830854415894, "rewards/margins": 0.843306303024292, "rewards/rejected": -1.2004246711730957, "step": 7285 }, { "epoch": 0.84, "learning_rate": 4.8741659838464244e-08, "logits/chosen": -3.00357723236084, "logits/rejected": -3.234877586364746, "logps/chosen": -200.8505859375, "logps/rejected": -272.5226745605469, "loss": 0.3513, "rewards/accuracies": 0.75, "rewards/chosen": -0.1436670422554016, "rewards/margins": 1.5940016508102417, "rewards/rejected": -1.7376686334609985, "step": 7286 }, { "epoch": 0.84, "learning_rate": 4.8706543368839985e-08, "logits/chosen": -3.370555877685547, "logits/rejected": -3.378140926361084, "logps/chosen": -176.77455139160156, "logps/rejected": -184.7077178955078, "loss": 0.425, "rewards/accuracies": 0.875, "rewards/chosen": -0.16141854226589203, "rewards/margins": 1.2648391723632812, "rewards/rejected": -1.426257848739624, "step": 7287 }, { "epoch": 0.84, "learning_rate": 4.867142689921573e-08, "logits/chosen": -3.794935941696167, "logits/rejected": -3.626936912536621, "logps/chosen": -440.64837646484375, "logps/rejected": -316.0798034667969, "loss": 0.491, "rewards/accuracies": 0.75, "rewards/chosen": -0.13842487335205078, "rewards/margins": 1.6401281356811523, "rewards/rejected": -1.7785531282424927, "step": 7288 }, { "epoch": 0.84, "learning_rate": 4.863631042959148e-08, "logits/chosen": -3.341630697250366, "logits/rejected": -3.200620651245117, "logps/chosen": -169.20860290527344, "logps/rejected": -151.58544921875, "loss": 0.3727, "rewards/accuracies": 0.875, "rewards/chosen": -0.3285248279571533, "rewards/margins": 2.000237464904785, "rewards/rejected": -2.3287620544433594, "step": 7289 }, { "epoch": 0.84, "learning_rate": 4.860119395996722e-08, "logits/chosen": -2.6627156734466553, "logits/rejected": -2.53596568107605, "logps/chosen": -468.583251953125, "logps/rejected": -315.4981689453125, "loss": 0.1697, "rewards/accuracies": 1.0, "rewards/chosen": 0.08667278289794922, "rewards/margins": 2.6280128955841064, "rewards/rejected": -2.5413401126861572, "step": 7290 }, { "epoch": 0.84, "learning_rate": 4.856607749034297e-08, "logits/chosen": -2.7861886024475098, "logits/rejected": -2.581777572631836, "logps/chosen": -432.36114501953125, "logps/rejected": -369.5299987792969, "loss": 0.3895, "rewards/accuracies": 0.875, "rewards/chosen": -0.167967289686203, "rewards/margins": 1.6322658061981201, "rewards/rejected": -1.800233006477356, "step": 7291 }, { "epoch": 0.84, "learning_rate": 4.8530961020718714e-08, "logits/chosen": -2.9562318325042725, "logits/rejected": -2.994007110595703, "logps/chosen": -248.79071044921875, "logps/rejected": -306.63031005859375, "loss": 0.1022, "rewards/accuracies": 1.0, "rewards/chosen": 0.9452049732208252, "rewards/margins": 3.174410581588745, "rewards/rejected": -2.22920560836792, "step": 7292 }, { "epoch": 0.84, "learning_rate": 4.849584455109446e-08, "logits/chosen": -2.2344038486480713, "logits/rejected": -2.411007881164551, "logps/chosen": -436.69000244140625, "logps/rejected": -193.89605712890625, "loss": 0.2749, "rewards/accuracies": 0.875, "rewards/chosen": -0.032880473881959915, "rewards/margins": 2.6431002616882324, "rewards/rejected": -2.675980806350708, "step": 7293 }, { "epoch": 0.84, "learning_rate": 4.84607280814702e-08, "logits/chosen": -2.9807276725769043, "logits/rejected": -3.4131040573120117, "logps/chosen": -138.2904510498047, "logps/rejected": -371.0430908203125, "loss": 0.4197, "rewards/accuracies": 0.875, "rewards/chosen": -0.21740078926086426, "rewards/margins": 2.3061039447784424, "rewards/rejected": -2.5235044956207275, "step": 7294 }, { "epoch": 0.84, "learning_rate": 4.8425611611845956e-08, "logits/chosen": -3.3203837871551514, "logits/rejected": -3.5049076080322266, "logps/chosen": -315.8813781738281, "logps/rejected": -333.078369140625, "loss": 0.1232, "rewards/accuracies": 1.0, "rewards/chosen": 0.7096199989318848, "rewards/margins": 3.622614860534668, "rewards/rejected": -2.912994861602783, "step": 7295 }, { "epoch": 0.84, "learning_rate": 4.8390495142221696e-08, "logits/chosen": -3.440485715866089, "logits/rejected": -3.521178722381592, "logps/chosen": -335.42938232421875, "logps/rejected": -489.822021484375, "loss": 0.1528, "rewards/accuracies": 1.0, "rewards/chosen": 0.08731476217508316, "rewards/margins": 4.07585334777832, "rewards/rejected": -3.9885382652282715, "step": 7296 }, { "epoch": 0.84, "learning_rate": 4.8355378672597444e-08, "logits/chosen": -3.420790195465088, "logits/rejected": -3.415738105773926, "logps/chosen": -440.493408203125, "logps/rejected": -245.51412963867188, "loss": 0.3192, "rewards/accuracies": 0.875, "rewards/chosen": 0.08056928217411041, "rewards/margins": 2.0284018516540527, "rewards/rejected": -1.947832465171814, "step": 7297 }, { "epoch": 0.84, "learning_rate": 4.832026220297319e-08, "logits/chosen": -3.185253381729126, "logits/rejected": -2.7340140342712402, "logps/chosen": -226.18995666503906, "logps/rejected": -158.39584350585938, "loss": 0.3169, "rewards/accuracies": 1.0, "rewards/chosen": -0.198471799492836, "rewards/margins": 1.2369322776794434, "rewards/rejected": -1.4354041814804077, "step": 7298 }, { "epoch": 0.84, "learning_rate": 4.828514573334894e-08, "logits/chosen": -3.8645191192626953, "logits/rejected": -3.8011977672576904, "logps/chosen": -237.42343139648438, "logps/rejected": -187.81903076171875, "loss": 0.7474, "rewards/accuracies": 0.75, "rewards/chosen": -0.7465457916259766, "rewards/margins": 0.8199043273925781, "rewards/rejected": -1.5664501190185547, "step": 7299 }, { "epoch": 0.84, "learning_rate": 4.825002926372468e-08, "logits/chosen": -3.102614402770996, "logits/rejected": -2.942016363143921, "logps/chosen": -221.24737548828125, "logps/rejected": -381.0291442871094, "loss": 0.3752, "rewards/accuracies": 0.875, "rewards/chosen": -0.28374022245407104, "rewards/margins": 3.445960521697998, "rewards/rejected": -3.7297005653381348, "step": 7300 }, { "epoch": 0.84, "learning_rate": 4.821491279410043e-08, "logits/chosen": -3.061697006225586, "logits/rejected": -3.1062002182006836, "logps/chosen": -297.1098937988281, "logps/rejected": -318.64300537109375, "loss": 0.3203, "rewards/accuracies": 0.75, "rewards/chosen": -0.6267428994178772, "rewards/margins": 2.014457941055298, "rewards/rejected": -2.641200542449951, "step": 7301 }, { "epoch": 0.84, "learning_rate": 4.817979632447617e-08, "logits/chosen": -2.8429322242736816, "logits/rejected": -2.9778459072113037, "logps/chosen": -142.3765106201172, "logps/rejected": -174.58303833007812, "loss": 0.6632, "rewards/accuracies": 0.625, "rewards/chosen": -0.6912792325019836, "rewards/margins": 0.4836684465408325, "rewards/rejected": -1.174947738647461, "step": 7302 }, { "epoch": 0.84, "learning_rate": 4.814467985485193e-08, "logits/chosen": -2.9730589389801025, "logits/rejected": -3.104862689971924, "logps/chosen": -320.78143310546875, "logps/rejected": -291.5182800292969, "loss": 0.2316, "rewards/accuracies": 1.0, "rewards/chosen": 0.03268962353467941, "rewards/margins": 2.9554500579833984, "rewards/rejected": -2.922760486602783, "step": 7303 }, { "epoch": 0.84, "learning_rate": 4.810956338522767e-08, "logits/chosen": -3.9333295822143555, "logits/rejected": -3.93192195892334, "logps/chosen": -461.83172607421875, "logps/rejected": -414.620361328125, "loss": 0.5247, "rewards/accuracies": 0.75, "rewards/chosen": -0.48872315883636475, "rewards/margins": 1.8870216608047485, "rewards/rejected": -2.3757448196411133, "step": 7304 }, { "epoch": 0.84, "learning_rate": 4.8074446915603415e-08, "logits/chosen": -3.002988815307617, "logits/rejected": -3.426597833633423, "logps/chosen": -287.8871765136719, "logps/rejected": -379.8193054199219, "loss": 0.1929, "rewards/accuracies": 0.875, "rewards/chosen": 0.281061053276062, "rewards/margins": 4.356834411621094, "rewards/rejected": -4.0757737159729, "step": 7305 }, { "epoch": 0.84, "learning_rate": 4.803933044597916e-08, "logits/chosen": -3.575083017349243, "logits/rejected": -3.5665464401245117, "logps/chosen": -159.91993713378906, "logps/rejected": -201.2017059326172, "loss": 0.2467, "rewards/accuracies": 0.875, "rewards/chosen": 0.5654672384262085, "rewards/margins": 2.040109634399414, "rewards/rejected": -1.4746425151824951, "step": 7306 }, { "epoch": 0.84, "learning_rate": 4.800421397635491e-08, "logits/chosen": -2.8793303966522217, "logits/rejected": -3.108724594116211, "logps/chosen": -243.35684204101562, "logps/rejected": -205.84352111816406, "loss": 0.2111, "rewards/accuracies": 0.875, "rewards/chosen": -0.1350824385881424, "rewards/margins": 2.2993600368499756, "rewards/rejected": -2.4344425201416016, "step": 7307 }, { "epoch": 0.84, "learning_rate": 4.796909750673065e-08, "logits/chosen": -3.401247262954712, "logits/rejected": -3.3550868034362793, "logps/chosen": -128.42762756347656, "logps/rejected": -343.7008056640625, "loss": 0.3172, "rewards/accuracies": 0.875, "rewards/chosen": 0.06420469284057617, "rewards/margins": 3.055263042449951, "rewards/rejected": -2.991058349609375, "step": 7308 }, { "epoch": 0.84, "learning_rate": 4.7933981037106404e-08, "logits/chosen": -3.4297101497650146, "logits/rejected": -3.5788333415985107, "logps/chosen": -197.015869140625, "logps/rejected": -186.0310821533203, "loss": 0.4897, "rewards/accuracies": 0.75, "rewards/chosen": -0.25712335109710693, "rewards/margins": 1.947446346282959, "rewards/rejected": -2.2045698165893555, "step": 7309 }, { "epoch": 0.84, "learning_rate": 4.7898864567482145e-08, "logits/chosen": -3.027949571609497, "logits/rejected": -3.2580952644348145, "logps/chosen": -182.4051055908203, "logps/rejected": -169.75430297851562, "loss": 0.3924, "rewards/accuracies": 0.875, "rewards/chosen": 0.4217607080936432, "rewards/margins": 1.1751235723495483, "rewards/rejected": -0.753362774848938, "step": 7310 }, { "epoch": 0.84, "learning_rate": 4.786374809785789e-08, "logits/chosen": -4.005672931671143, "logits/rejected": -3.927506923675537, "logps/chosen": -179.89671325683594, "logps/rejected": -259.1134338378906, "loss": 0.7326, "rewards/accuracies": 0.375, "rewards/chosen": -0.4258078634738922, "rewards/margins": 0.3502572178840637, "rewards/rejected": -0.7760651111602783, "step": 7311 }, { "epoch": 0.84, "learning_rate": 4.782863162823364e-08, "logits/chosen": -3.840287208557129, "logits/rejected": -3.5483999252319336, "logps/chosen": -169.66378784179688, "logps/rejected": -139.97280883789062, "loss": 0.3329, "rewards/accuracies": 0.75, "rewards/chosen": -0.12246038019657135, "rewards/margins": 1.8799288272857666, "rewards/rejected": -2.0023891925811768, "step": 7312 }, { "epoch": 0.84, "learning_rate": 4.7793515158609386e-08, "logits/chosen": -3.4007012844085693, "logits/rejected": -3.5846152305603027, "logps/chosen": -122.65921020507812, "logps/rejected": -274.78289794921875, "loss": 0.2426, "rewards/accuracies": 0.875, "rewards/chosen": -0.20147854089736938, "rewards/margins": 2.8531172275543213, "rewards/rejected": -3.054595947265625, "step": 7313 }, { "epoch": 0.84, "learning_rate": 4.775839868898513e-08, "logits/chosen": -2.6804275512695312, "logits/rejected": -2.589238405227661, "logps/chosen": -202.95384216308594, "logps/rejected": -221.0663299560547, "loss": 0.5247, "rewards/accuracies": 0.75, "rewards/chosen": -0.16295230388641357, "rewards/margins": 0.8561410903930664, "rewards/rejected": -1.01909339427948, "step": 7314 }, { "epoch": 0.84, "learning_rate": 4.772328221936088e-08, "logits/chosen": -2.763871431350708, "logits/rejected": -2.8507299423217773, "logps/chosen": -180.61026000976562, "logps/rejected": -250.99374389648438, "loss": 0.2902, "rewards/accuracies": 0.75, "rewards/chosen": -0.09512703120708466, "rewards/margins": 3.4204747676849365, "rewards/rejected": -3.515601634979248, "step": 7315 }, { "epoch": 0.84, "learning_rate": 4.768816574973662e-08, "logits/chosen": -3.0319623947143555, "logits/rejected": -2.837456226348877, "logps/chosen": -338.229248046875, "logps/rejected": -396.53668212890625, "loss": 0.4465, "rewards/accuracies": 0.875, "rewards/chosen": 0.10290517657995224, "rewards/margins": 1.1387763023376465, "rewards/rejected": -1.0358712673187256, "step": 7316 }, { "epoch": 0.84, "learning_rate": 4.7653049280112375e-08, "logits/chosen": -3.8190183639526367, "logits/rejected": -3.68247652053833, "logps/chosen": -212.0687255859375, "logps/rejected": -266.50396728515625, "loss": 0.4491, "rewards/accuracies": 0.875, "rewards/chosen": -0.11737871170043945, "rewards/margins": 2.275092601776123, "rewards/rejected": -2.3924717903137207, "step": 7317 }, { "epoch": 0.84, "learning_rate": 4.7617932810488116e-08, "logits/chosen": -2.8385801315307617, "logits/rejected": -2.957085371017456, "logps/chosen": -278.64447021484375, "logps/rejected": -184.35968017578125, "loss": 0.6205, "rewards/accuracies": 0.75, "rewards/chosen": -0.6117500066757202, "rewards/margins": 0.6758273243904114, "rewards/rejected": -1.2875773906707764, "step": 7318 }, { "epoch": 0.84, "learning_rate": 4.758281634086386e-08, "logits/chosen": -3.0211052894592285, "logits/rejected": -3.087735176086426, "logps/chosen": -105.45380401611328, "logps/rejected": -150.23008728027344, "loss": 0.5139, "rewards/accuracies": 0.75, "rewards/chosen": -0.1231069341301918, "rewards/margins": 0.9202936291694641, "rewards/rejected": -1.043400526046753, "step": 7319 }, { "epoch": 0.84, "learning_rate": 4.754769987123961e-08, "logits/chosen": -3.249913215637207, "logits/rejected": -2.991976261138916, "logps/chosen": -324.0323486328125, "logps/rejected": -385.9041748046875, "loss": 0.5114, "rewards/accuracies": 0.875, "rewards/chosen": 0.45750170946121216, "rewards/margins": 1.5237070322036743, "rewards/rejected": -1.0662052631378174, "step": 7320 }, { "epoch": 0.84, "learning_rate": 4.751258340161536e-08, "logits/chosen": -3.225053071975708, "logits/rejected": -3.1735119819641113, "logps/chosen": -186.2451171875, "logps/rejected": -172.99380493164062, "loss": 0.3687, "rewards/accuracies": 0.75, "rewards/chosen": -0.17560851573944092, "rewards/margins": 1.128713846206665, "rewards/rejected": -1.304322361946106, "step": 7321 }, { "epoch": 0.84, "learning_rate": 4.74774669319911e-08, "logits/chosen": -2.422070264816284, "logits/rejected": -2.704610824584961, "logps/chosen": -254.8570556640625, "logps/rejected": -208.01219177246094, "loss": 0.5933, "rewards/accuracies": 0.625, "rewards/chosen": -0.2295866161584854, "rewards/margins": 0.40061384439468384, "rewards/rejected": -0.6302005052566528, "step": 7322 }, { "epoch": 0.84, "learning_rate": 4.744235046236685e-08, "logits/chosen": -2.7399778366088867, "logits/rejected": -2.8843202590942383, "logps/chosen": -195.7682647705078, "logps/rejected": -245.11338806152344, "loss": 0.3485, "rewards/accuracies": 0.875, "rewards/chosen": -0.24796563386917114, "rewards/margins": 1.8631551265716553, "rewards/rejected": -2.1111207008361816, "step": 7323 }, { "epoch": 0.84, "learning_rate": 4.740723399274259e-08, "logits/chosen": -3.283493757247925, "logits/rejected": -3.4087436199188232, "logps/chosen": -104.8115234375, "logps/rejected": -119.67205810546875, "loss": 0.2859, "rewards/accuracies": 0.875, "rewards/chosen": 0.3326238989830017, "rewards/margins": 1.4431123733520508, "rewards/rejected": -1.1104885339736938, "step": 7324 }, { "epoch": 0.84, "learning_rate": 4.7372117523118347e-08, "logits/chosen": -3.4501538276672363, "logits/rejected": -2.9166085720062256, "logps/chosen": -423.3934020996094, "logps/rejected": -302.6856689453125, "loss": 0.3584, "rewards/accuracies": 0.75, "rewards/chosen": 0.28087443113327026, "rewards/margins": 2.6533474922180176, "rewards/rejected": -2.3724732398986816, "step": 7325 }, { "epoch": 0.84, "learning_rate": 4.733700105349409e-08, "logits/chosen": -3.491476535797119, "logits/rejected": -3.1719436645507812, "logps/chosen": -219.1077880859375, "logps/rejected": -207.60372924804688, "loss": 0.6018, "rewards/accuracies": 0.75, "rewards/chosen": -0.4429379105567932, "rewards/margins": 1.3127529621124268, "rewards/rejected": -1.7556910514831543, "step": 7326 }, { "epoch": 0.84, "learning_rate": 4.7301884583869834e-08, "logits/chosen": -2.8877086639404297, "logits/rejected": -2.5626325607299805, "logps/chosen": -231.3386993408203, "logps/rejected": -170.88784790039062, "loss": 0.2943, "rewards/accuracies": 0.75, "rewards/chosen": -0.08883452415466309, "rewards/margins": 1.6548713445663452, "rewards/rejected": -1.7437058687210083, "step": 7327 }, { "epoch": 0.84, "learning_rate": 4.7266768114245575e-08, "logits/chosen": -3.6558258533477783, "logits/rejected": -3.944817304611206, "logps/chosen": -114.43352508544922, "logps/rejected": -219.16766357421875, "loss": 0.1722, "rewards/accuracies": 1.0, "rewards/chosen": -0.1752062737941742, "rewards/margins": 2.490837812423706, "rewards/rejected": -2.6660444736480713, "step": 7328 }, { "epoch": 0.84, "learning_rate": 4.723165164462133e-08, "logits/chosen": -3.315973997116089, "logits/rejected": -3.5065174102783203, "logps/chosen": -278.18060302734375, "logps/rejected": -295.3574523925781, "loss": 0.3742, "rewards/accuracies": 0.875, "rewards/chosen": -0.29532575607299805, "rewards/margins": 1.7133129835128784, "rewards/rejected": -2.008638858795166, "step": 7329 }, { "epoch": 0.85, "learning_rate": 4.719653517499707e-08, "logits/chosen": -3.7902326583862305, "logits/rejected": -3.832307815551758, "logps/chosen": -271.6027526855469, "logps/rejected": -314.26898193359375, "loss": 0.2584, "rewards/accuracies": 0.75, "rewards/chosen": 0.16282454133033752, "rewards/margins": 3.438626527786255, "rewards/rejected": -3.2758021354675293, "step": 7330 }, { "epoch": 0.85, "learning_rate": 4.7161418705372823e-08, "logits/chosen": -3.0126850605010986, "logits/rejected": -3.073497772216797, "logps/chosen": -359.3127746582031, "logps/rejected": -323.47991943359375, "loss": 0.2408, "rewards/accuracies": 1.0, "rewards/chosen": 0.5288572311401367, "rewards/margins": 1.8935892581939697, "rewards/rejected": -1.364732027053833, "step": 7331 }, { "epoch": 0.85, "learning_rate": 4.7126302235748564e-08, "logits/chosen": -3.708963394165039, "logits/rejected": -3.5330350399017334, "logps/chosen": -256.1639099121094, "logps/rejected": -335.654296875, "loss": 0.3016, "rewards/accuracies": 0.75, "rewards/chosen": 0.23040857911109924, "rewards/margins": 1.8226168155670166, "rewards/rejected": -1.5922081470489502, "step": 7332 }, { "epoch": 0.85, "learning_rate": 4.709118576612431e-08, "logits/chosen": -2.91880464553833, "logits/rejected": -3.4834585189819336, "logps/chosen": -191.68170166015625, "logps/rejected": -208.81973266601562, "loss": 0.7786, "rewards/accuracies": 0.625, "rewards/chosen": -0.8515530824661255, "rewards/margins": 0.48204243183135986, "rewards/rejected": -1.3335955142974854, "step": 7333 }, { "epoch": 0.85, "learning_rate": 4.705606929650006e-08, "logits/chosen": -2.8397560119628906, "logits/rejected": -3.1333260536193848, "logps/chosen": -126.1759033203125, "logps/rejected": -219.0943603515625, "loss": 0.4748, "rewards/accuracies": 0.75, "rewards/chosen": -0.3072376847267151, "rewards/margins": 1.2899103164672852, "rewards/rejected": -1.5971479415893555, "step": 7334 }, { "epoch": 0.85, "learning_rate": 4.7020952826875806e-08, "logits/chosen": -2.8610587120056152, "logits/rejected": -3.2174344062805176, "logps/chosen": -317.0050048828125, "logps/rejected": -224.5142822265625, "loss": 0.2197, "rewards/accuracies": 1.0, "rewards/chosen": 0.6875356435775757, "rewards/margins": 1.741040825843811, "rewards/rejected": -1.0535050630569458, "step": 7335 }, { "epoch": 0.85, "learning_rate": 4.6985836357251546e-08, "logits/chosen": -2.957298994064331, "logits/rejected": -3.1131653785705566, "logps/chosen": -213.8180694580078, "logps/rejected": -217.9388427734375, "loss": 0.4836, "rewards/accuracies": 0.875, "rewards/chosen": -0.0843716412782669, "rewards/margins": 1.8703190088272095, "rewards/rejected": -1.95469069480896, "step": 7336 }, { "epoch": 0.85, "learning_rate": 4.69507198876273e-08, "logits/chosen": -3.2588839530944824, "logits/rejected": -3.5889248847961426, "logps/chosen": -254.72283935546875, "logps/rejected": -355.9941101074219, "loss": 0.2524, "rewards/accuracies": 0.875, "rewards/chosen": -0.30633866786956787, "rewards/margins": 2.3448753356933594, "rewards/rejected": -2.6512138843536377, "step": 7337 }, { "epoch": 0.85, "learning_rate": 4.691560341800304e-08, "logits/chosen": -2.3245861530303955, "logits/rejected": -2.4936394691467285, "logps/chosen": -371.86859130859375, "logps/rejected": -280.9288330078125, "loss": 0.5391, "rewards/accuracies": 0.75, "rewards/chosen": 0.3915713429450989, "rewards/margins": 1.0539168119430542, "rewards/rejected": -0.6623454689979553, "step": 7338 }, { "epoch": 0.85, "learning_rate": 4.688048694837878e-08, "logits/chosen": -2.890493392944336, "logits/rejected": -2.829853057861328, "logps/chosen": -244.71060180664062, "logps/rejected": -300.8856201171875, "loss": 0.1957, "rewards/accuracies": 1.0, "rewards/chosen": 0.11171610653400421, "rewards/margins": 3.6254050731658936, "rewards/rejected": -3.5136890411376953, "step": 7339 }, { "epoch": 0.85, "learning_rate": 4.6845370478754535e-08, "logits/chosen": -3.243692398071289, "logits/rejected": -3.4760990142822266, "logps/chosen": -164.147216796875, "logps/rejected": -215.63995361328125, "loss": 0.2067, "rewards/accuracies": 1.0, "rewards/chosen": 0.10448744148015976, "rewards/margins": 4.02479362487793, "rewards/rejected": -3.9203059673309326, "step": 7340 }, { "epoch": 0.85, "learning_rate": 4.6810254009130276e-08, "logits/chosen": -3.750148296356201, "logits/rejected": -3.697678327560425, "logps/chosen": -204.79827880859375, "logps/rejected": -261.7188720703125, "loss": 0.6503, "rewards/accuracies": 0.5, "rewards/chosen": -1.047427773475647, "rewards/margins": 1.7234405279159546, "rewards/rejected": -2.7708683013916016, "step": 7341 }, { "epoch": 0.85, "learning_rate": 4.677513753950603e-08, "logits/chosen": -2.7344017028808594, "logits/rejected": -2.74267578125, "logps/chosen": -344.7205505371094, "logps/rejected": -223.904296875, "loss": 0.2918, "rewards/accuracies": 0.75, "rewards/chosen": -0.018908776342868805, "rewards/margins": 2.380125045776367, "rewards/rejected": -2.399034023284912, "step": 7342 }, { "epoch": 0.85, "learning_rate": 4.674002106988177e-08, "logits/chosen": -2.784703254699707, "logits/rejected": -2.731060743331909, "logps/chosen": -372.99859619140625, "logps/rejected": -297.0814208984375, "loss": 0.5189, "rewards/accuracies": 0.875, "rewards/chosen": 0.05833778530359268, "rewards/margins": 0.9888304471969604, "rewards/rejected": -0.930492639541626, "step": 7343 }, { "epoch": 0.85, "learning_rate": 4.670490460025752e-08, "logits/chosen": -2.8519275188446045, "logits/rejected": -2.8314380645751953, "logps/chosen": -416.91717529296875, "logps/rejected": -377.5380554199219, "loss": 0.4659, "rewards/accuracies": 0.875, "rewards/chosen": -0.5273405909538269, "rewards/margins": 1.565872073173523, "rewards/rejected": -2.093212366104126, "step": 7344 }, { "epoch": 0.85, "learning_rate": 4.666978813063326e-08, "logits/chosen": -4.028273582458496, "logits/rejected": -3.8425886631011963, "logps/chosen": -302.6734313964844, "logps/rejected": -279.5216979980469, "loss": 0.1394, "rewards/accuracies": 1.0, "rewards/chosen": 0.578534722328186, "rewards/margins": 2.5576696395874023, "rewards/rejected": -1.9791351556777954, "step": 7345 }, { "epoch": 0.85, "learning_rate": 4.663467166100901e-08, "logits/chosen": -3.1002674102783203, "logits/rejected": -3.0070433616638184, "logps/chosen": -386.769775390625, "logps/rejected": -298.45989990234375, "loss": 0.2962, "rewards/accuracies": 0.875, "rewards/chosen": 0.29456162452697754, "rewards/margins": 2.590243101119995, "rewards/rejected": -2.2956814765930176, "step": 7346 }, { "epoch": 0.85, "learning_rate": 4.659955519138475e-08, "logits/chosen": -2.9068028926849365, "logits/rejected": -2.9619979858398438, "logps/chosen": -329.574951171875, "logps/rejected": -329.071044921875, "loss": 0.3597, "rewards/accuracies": 0.875, "rewards/chosen": -0.04635202884674072, "rewards/margins": 2.993783712387085, "rewards/rejected": -3.040135622024536, "step": 7347 }, { "epoch": 0.85, "learning_rate": 4.6564438721760507e-08, "logits/chosen": -3.2784523963928223, "logits/rejected": -3.2948689460754395, "logps/chosen": -205.94784545898438, "logps/rejected": -214.25070190429688, "loss": 0.605, "rewards/accuracies": 0.75, "rewards/chosen": -0.16859325766563416, "rewards/margins": 1.6817902326583862, "rewards/rejected": -1.8503834009170532, "step": 7348 }, { "epoch": 0.85, "learning_rate": 4.652932225213625e-08, "logits/chosen": -3.048021078109741, "logits/rejected": -3.0928821563720703, "logps/chosen": -296.2345886230469, "logps/rejected": -255.06875610351562, "loss": 0.7382, "rewards/accuracies": 0.625, "rewards/chosen": -0.29918408393859863, "rewards/margins": 0.9553760290145874, "rewards/rejected": -1.254560112953186, "step": 7349 }, { "epoch": 0.85, "learning_rate": 4.6494205782511994e-08, "logits/chosen": -3.6084914207458496, "logits/rejected": -3.455488443374634, "logps/chosen": -266.482666015625, "logps/rejected": -268.52020263671875, "loss": 0.3066, "rewards/accuracies": 0.875, "rewards/chosen": -0.7531477212905884, "rewards/margins": 1.7420415878295898, "rewards/rejected": -2.4951891899108887, "step": 7350 }, { "epoch": 0.85, "learning_rate": 4.645908931288774e-08, "logits/chosen": -2.858302116394043, "logits/rejected": -2.5553460121154785, "logps/chosen": -237.10000610351562, "logps/rejected": -196.5335235595703, "loss": 0.3574, "rewards/accuracies": 1.0, "rewards/chosen": -0.12628306448459625, "rewards/margins": 0.9533749222755432, "rewards/rejected": -1.0796579122543335, "step": 7351 }, { "epoch": 0.85, "learning_rate": 4.642397284326349e-08, "logits/chosen": -2.5157790184020996, "logits/rejected": -2.3960089683532715, "logps/chosen": -240.721435546875, "logps/rejected": -247.38058471679688, "loss": 0.4024, "rewards/accuracies": 0.75, "rewards/chosen": -0.21461085975170135, "rewards/margins": 1.7407556772232056, "rewards/rejected": -1.955366611480713, "step": 7352 }, { "epoch": 0.85, "learning_rate": 4.638885637363923e-08, "logits/chosen": -3.074939250946045, "logits/rejected": -3.361172676086426, "logps/chosen": -200.81396484375, "logps/rejected": -309.76910400390625, "loss": 0.19, "rewards/accuracies": 1.0, "rewards/chosen": 0.023403527215123177, "rewards/margins": 2.3370542526245117, "rewards/rejected": -2.3136508464813232, "step": 7353 }, { "epoch": 0.85, "learning_rate": 4.6353739904014983e-08, "logits/chosen": -3.2743964195251465, "logits/rejected": -3.385770797729492, "logps/chosen": -251.82229614257812, "logps/rejected": -214.83837890625, "loss": 0.2196, "rewards/accuracies": 1.0, "rewards/chosen": -0.3394489288330078, "rewards/margins": 2.7553133964538574, "rewards/rejected": -3.0947623252868652, "step": 7354 }, { "epoch": 0.85, "learning_rate": 4.6318623434390724e-08, "logits/chosen": -3.495187997817993, "logits/rejected": -3.1620311737060547, "logps/chosen": -171.62982177734375, "logps/rejected": -202.3713836669922, "loss": 0.9129, "rewards/accuracies": 0.625, "rewards/chosen": -0.13207721710205078, "rewards/margins": 0.032562822103500366, "rewards/rejected": -0.16464009881019592, "step": 7355 }, { "epoch": 0.85, "learning_rate": 4.628350696476648e-08, "logits/chosen": -2.7904837131500244, "logits/rejected": -2.564824342727661, "logps/chosen": -348.6330261230469, "logps/rejected": -349.6911315917969, "loss": 0.3207, "rewards/accuracies": 0.75, "rewards/chosen": 0.16057689487934113, "rewards/margins": 2.106289863586426, "rewards/rejected": -1.945712924003601, "step": 7356 }, { "epoch": 0.85, "learning_rate": 4.624839049514222e-08, "logits/chosen": -3.273210287094116, "logits/rejected": -3.158374786376953, "logps/chosen": -366.6065368652344, "logps/rejected": -287.60882568359375, "loss": 0.5256, "rewards/accuracies": 0.75, "rewards/chosen": -0.5399941205978394, "rewards/margins": 1.22239351272583, "rewards/rejected": -1.762387752532959, "step": 7357 }, { "epoch": 0.85, "learning_rate": 4.6213274025517966e-08, "logits/chosen": -3.065751075744629, "logits/rejected": -3.1093544960021973, "logps/chosen": -180.89016723632812, "logps/rejected": -137.0840606689453, "loss": 0.5285, "rewards/accuracies": 0.75, "rewards/chosen": -0.4979577958583832, "rewards/margins": 0.5182552337646484, "rewards/rejected": -1.016213059425354, "step": 7358 }, { "epoch": 0.85, "learning_rate": 4.617815755589371e-08, "logits/chosen": -2.9162228107452393, "logits/rejected": -2.9554502964019775, "logps/chosen": -417.62969970703125, "logps/rejected": -360.80120849609375, "loss": 0.9655, "rewards/accuracies": 0.5, "rewards/chosen": -0.8676888942718506, "rewards/margins": 1.8249750137329102, "rewards/rejected": -2.6926639080047607, "step": 7359 }, { "epoch": 0.85, "learning_rate": 4.614304108626946e-08, "logits/chosen": -3.9769082069396973, "logits/rejected": -3.8023223876953125, "logps/chosen": -180.43881225585938, "logps/rejected": -184.5065460205078, "loss": 0.2377, "rewards/accuracies": 0.875, "rewards/chosen": 0.223817840218544, "rewards/margins": 2.0683250427246094, "rewards/rejected": -1.8445074558258057, "step": 7360 }, { "epoch": 0.85, "learning_rate": 4.61079246166452e-08, "logits/chosen": -2.9157416820526123, "logits/rejected": -2.7604269981384277, "logps/chosen": -425.509521484375, "logps/rejected": -443.06561279296875, "loss": 0.2839, "rewards/accuracies": 0.875, "rewards/chosen": -0.4441467225551605, "rewards/margins": 2.7436764240264893, "rewards/rejected": -3.1878232955932617, "step": 7361 }, { "epoch": 0.85, "learning_rate": 4.6072808147020955e-08, "logits/chosen": -3.153310537338257, "logits/rejected": -3.0169992446899414, "logps/chosen": -336.7825622558594, "logps/rejected": -254.8670196533203, "loss": 0.2862, "rewards/accuracies": 1.0, "rewards/chosen": 0.43875330686569214, "rewards/margins": 1.9437536001205444, "rewards/rejected": -1.505000352859497, "step": 7362 }, { "epoch": 0.85, "learning_rate": 4.6037691677396695e-08, "logits/chosen": -3.0551371574401855, "logits/rejected": -3.0391879081726074, "logps/chosen": -361.33734130859375, "logps/rejected": -315.07598876953125, "loss": 0.1707, "rewards/accuracies": 1.0, "rewards/chosen": -0.07322511076927185, "rewards/margins": 2.002021074295044, "rewards/rejected": -2.0752463340759277, "step": 7363 }, { "epoch": 0.85, "learning_rate": 4.600257520777244e-08, "logits/chosen": -3.6549954414367676, "logits/rejected": -3.5781373977661133, "logps/chosen": -207.0986785888672, "logps/rejected": -173.01397705078125, "loss": 0.3797, "rewards/accuracies": 0.875, "rewards/chosen": -0.3333570063114166, "rewards/margins": 2.0400099754333496, "rewards/rejected": -2.3733668327331543, "step": 7364 }, { "epoch": 0.85, "learning_rate": 4.596745873814819e-08, "logits/chosen": -2.390343189239502, "logits/rejected": -2.6015326976776123, "logps/chosen": -340.1717529296875, "logps/rejected": -299.9361572265625, "loss": 0.3007, "rewards/accuracies": 0.875, "rewards/chosen": -0.4012141227722168, "rewards/margins": 1.6989376544952393, "rewards/rejected": -2.100151777267456, "step": 7365 }, { "epoch": 0.85, "learning_rate": 4.593234226852394e-08, "logits/chosen": -3.4621875286102295, "logits/rejected": -3.280602216720581, "logps/chosen": -296.4013671875, "logps/rejected": -230.3800506591797, "loss": 0.2827, "rewards/accuracies": 0.875, "rewards/chosen": 0.6775476932525635, "rewards/margins": 1.8550424575805664, "rewards/rejected": -1.177494764328003, "step": 7366 }, { "epoch": 0.85, "learning_rate": 4.589722579889968e-08, "logits/chosen": -3.122526168823242, "logits/rejected": -3.040761709213257, "logps/chosen": -190.0733184814453, "logps/rejected": -252.55596923828125, "loss": 0.4569, "rewards/accuracies": 0.75, "rewards/chosen": -0.09075909852981567, "rewards/margins": 1.0809311866760254, "rewards/rejected": -1.1716903448104858, "step": 7367 }, { "epoch": 0.85, "learning_rate": 4.586210932927543e-08, "logits/chosen": -3.335434675216675, "logits/rejected": -3.025513172149658, "logps/chosen": -303.6552734375, "logps/rejected": -215.19613647460938, "loss": 0.5531, "rewards/accuracies": 0.875, "rewards/chosen": 0.07645189762115479, "rewards/margins": 1.9050986766815186, "rewards/rejected": -1.8286468982696533, "step": 7368 }, { "epoch": 0.85, "learning_rate": 4.582699285965117e-08, "logits/chosen": -3.5892951488494873, "logits/rejected": -3.3400070667266846, "logps/chosen": -153.0694580078125, "logps/rejected": -196.88465881347656, "loss": 0.4944, "rewards/accuracies": 0.75, "rewards/chosen": -0.17493481934070587, "rewards/margins": 1.3493777513504028, "rewards/rejected": -1.5243126153945923, "step": 7369 }, { "epoch": 0.85, "learning_rate": 4.5791876390026926e-08, "logits/chosen": -2.626343250274658, "logits/rejected": -3.0521528720855713, "logps/chosen": -166.386962890625, "logps/rejected": -288.65350341796875, "loss": 0.4311, "rewards/accuracies": 0.75, "rewards/chosen": 0.691758930683136, "rewards/margins": 2.1181459426879883, "rewards/rejected": -1.426387071609497, "step": 7370 }, { "epoch": 0.85, "learning_rate": 4.5756759920402667e-08, "logits/chosen": -3.447619676589966, "logits/rejected": -3.3275046348571777, "logps/chosen": -333.9166259765625, "logps/rejected": -212.23712158203125, "loss": 0.4619, "rewards/accuracies": 0.75, "rewards/chosen": -0.67537522315979, "rewards/margins": 1.3704421520233154, "rewards/rejected": -2.0458173751831055, "step": 7371 }, { "epoch": 0.85, "learning_rate": 4.5721643450778414e-08, "logits/chosen": -3.1989216804504395, "logits/rejected": -2.735759973526001, "logps/chosen": -207.4794921875, "logps/rejected": -224.9835205078125, "loss": 0.0753, "rewards/accuracies": 1.0, "rewards/chosen": 0.5257019400596619, "rewards/margins": 2.663910388946533, "rewards/rejected": -2.1382083892822266, "step": 7372 }, { "epoch": 0.85, "learning_rate": 4.568652698115416e-08, "logits/chosen": -3.432699680328369, "logits/rejected": -3.2786669731140137, "logps/chosen": -127.67857360839844, "logps/rejected": -164.77244567871094, "loss": 0.282, "rewards/accuracies": 1.0, "rewards/chosen": 0.25780928134918213, "rewards/margins": 2.0598089694976807, "rewards/rejected": -1.8019996881484985, "step": 7373 }, { "epoch": 0.85, "learning_rate": 4.565141051152991e-08, "logits/chosen": -3.2788288593292236, "logits/rejected": -3.306389808654785, "logps/chosen": -225.0753173828125, "logps/rejected": -337.6608581542969, "loss": 0.247, "rewards/accuracies": 0.875, "rewards/chosen": -0.08364330977201462, "rewards/margins": 2.512240171432495, "rewards/rejected": -2.59588360786438, "step": 7374 }, { "epoch": 0.85, "learning_rate": 4.561629404190565e-08, "logits/chosen": -2.615952730178833, "logits/rejected": -3.077463150024414, "logps/chosen": -159.70187377929688, "logps/rejected": -324.16229248046875, "loss": 0.1169, "rewards/accuracies": 1.0, "rewards/chosen": 0.12438494712114334, "rewards/margins": 4.684408187866211, "rewards/rejected": -4.560022830963135, "step": 7375 }, { "epoch": 0.85, "learning_rate": 4.55811775722814e-08, "logits/chosen": -3.578324556350708, "logits/rejected": -3.5736541748046875, "logps/chosen": -246.40606689453125, "logps/rejected": -295.82696533203125, "loss": 0.1832, "rewards/accuracies": 1.0, "rewards/chosen": -0.25619590282440186, "rewards/margins": 3.3249778747558594, "rewards/rejected": -3.5811736583709717, "step": 7376 }, { "epoch": 0.85, "learning_rate": 4.5546061102657143e-08, "logits/chosen": -3.2314066886901855, "logits/rejected": -3.0887951850891113, "logps/chosen": -214.36293029785156, "logps/rejected": -292.63922119140625, "loss": 0.2487, "rewards/accuracies": 0.875, "rewards/chosen": -0.06424164772033691, "rewards/margins": 2.1364798545837402, "rewards/rejected": -2.2007217407226562, "step": 7377 }, { "epoch": 0.85, "learning_rate": 4.55109446330329e-08, "logits/chosen": -3.5547783374786377, "logits/rejected": -3.8678107261657715, "logps/chosen": -264.2967224121094, "logps/rejected": -400.1763610839844, "loss": 0.6633, "rewards/accuracies": 0.625, "rewards/chosen": -0.24900075793266296, "rewards/margins": 0.6543028950691223, "rewards/rejected": -0.9033036231994629, "step": 7378 }, { "epoch": 0.85, "learning_rate": 4.547582816340864e-08, "logits/chosen": -3.590613603591919, "logits/rejected": -3.8312416076660156, "logps/chosen": -112.12344360351562, "logps/rejected": -234.7897186279297, "loss": 0.2583, "rewards/accuracies": 0.875, "rewards/chosen": 0.6900643706321716, "rewards/margins": 3.0393528938293457, "rewards/rejected": -2.3492884635925293, "step": 7379 }, { "epoch": 0.85, "learning_rate": 4.5440711693784385e-08, "logits/chosen": -3.241446018218994, "logits/rejected": -3.127260208129883, "logps/chosen": -318.18878173828125, "logps/rejected": -264.37451171875, "loss": 0.1985, "rewards/accuracies": 1.0, "rewards/chosen": 0.24911613762378693, "rewards/margins": 2.3555798530578613, "rewards/rejected": -2.10646390914917, "step": 7380 }, { "epoch": 0.85, "learning_rate": 4.5405595224160126e-08, "logits/chosen": -2.711111545562744, "logits/rejected": -2.7783877849578857, "logps/chosen": -282.016845703125, "logps/rejected": -269.02197265625, "loss": 0.2801, "rewards/accuracies": 1.0, "rewards/chosen": 0.32066312432289124, "rewards/margins": 2.2598624229431152, "rewards/rejected": -1.9391992092132568, "step": 7381 }, { "epoch": 0.85, "learning_rate": 4.537047875453588e-08, "logits/chosen": -2.589491844177246, "logits/rejected": -2.4451112747192383, "logps/chosen": -215.72955322265625, "logps/rejected": -361.7466735839844, "loss": 0.3281, "rewards/accuracies": 0.75, "rewards/chosen": -0.0880458801984787, "rewards/margins": 1.8985483646392822, "rewards/rejected": -1.9865942001342773, "step": 7382 }, { "epoch": 0.85, "learning_rate": 4.533536228491162e-08, "logits/chosen": -2.672924518585205, "logits/rejected": -2.4827873706817627, "logps/chosen": -209.29473876953125, "logps/rejected": -381.988037109375, "loss": 0.2951, "rewards/accuracies": 0.875, "rewards/chosen": -0.5538262128829956, "rewards/margins": 2.0753672122955322, "rewards/rejected": -2.6291933059692383, "step": 7383 }, { "epoch": 0.85, "learning_rate": 4.5300245815287374e-08, "logits/chosen": -3.3280417919158936, "logits/rejected": -2.8145813941955566, "logps/chosen": -485.22564697265625, "logps/rejected": -286.356201171875, "loss": 0.2291, "rewards/accuracies": 0.875, "rewards/chosen": 0.36938369274139404, "rewards/margins": 2.40694522857666, "rewards/rejected": -2.0375614166259766, "step": 7384 }, { "epoch": 0.85, "learning_rate": 4.5265129345663115e-08, "logits/chosen": -3.1267619132995605, "logits/rejected": -3.0390665531158447, "logps/chosen": -236.42581176757812, "logps/rejected": -399.0465087890625, "loss": 0.2059, "rewards/accuracies": 1.0, "rewards/chosen": -0.20961330831050873, "rewards/margins": 2.5146102905273438, "rewards/rejected": -2.7242238521575928, "step": 7385 }, { "epoch": 0.85, "learning_rate": 4.5230012876038855e-08, "logits/chosen": -2.743500232696533, "logits/rejected": -2.599937915802002, "logps/chosen": -684.5462646484375, "logps/rejected": -273.0282897949219, "loss": 0.2849, "rewards/accuracies": 0.875, "rewards/chosen": -0.37285763025283813, "rewards/margins": 1.521550178527832, "rewards/rejected": -1.8944077491760254, "step": 7386 }, { "epoch": 0.85, "learning_rate": 4.519489640641461e-08, "logits/chosen": -2.5405452251434326, "logits/rejected": -2.197251081466675, "logps/chosen": -327.2315979003906, "logps/rejected": -294.61297607421875, "loss": 0.317, "rewards/accuracies": 0.75, "rewards/chosen": 0.13133585453033447, "rewards/margins": 2.1776015758514404, "rewards/rejected": -2.0462658405303955, "step": 7387 }, { "epoch": 0.85, "learning_rate": 4.515977993679035e-08, "logits/chosen": -3.529749870300293, "logits/rejected": -3.489616870880127, "logps/chosen": -293.01055908203125, "logps/rejected": -245.04010009765625, "loss": 0.1952, "rewards/accuracies": 1.0, "rewards/chosen": 0.21759814023971558, "rewards/margins": 2.4155449867248535, "rewards/rejected": -2.197946548461914, "step": 7388 }, { "epoch": 0.85, "learning_rate": 4.51246634671661e-08, "logits/chosen": -3.1082262992858887, "logits/rejected": -2.7512238025665283, "logps/chosen": -144.06973266601562, "logps/rejected": -167.82850646972656, "loss": 0.4601, "rewards/accuracies": 0.625, "rewards/chosen": -0.1762596070766449, "rewards/margins": 1.1656489372253418, "rewards/rejected": -1.3419084548950195, "step": 7389 }, { "epoch": 0.85, "learning_rate": 4.5089546997541844e-08, "logits/chosen": -2.5238072872161865, "logits/rejected": -2.6004087924957275, "logps/chosen": -284.48199462890625, "logps/rejected": -167.90647888183594, "loss": 0.5149, "rewards/accuracies": 0.625, "rewards/chosen": 0.38004207611083984, "rewards/margins": 1.6261969804763794, "rewards/rejected": -1.246155023574829, "step": 7390 }, { "epoch": 0.85, "learning_rate": 4.505443052791759e-08, "logits/chosen": -2.6539084911346436, "logits/rejected": -2.9682154655456543, "logps/chosen": -358.566162109375, "logps/rejected": -235.00042724609375, "loss": 0.9853, "rewards/accuracies": 0.625, "rewards/chosen": -0.748590350151062, "rewards/margins": 0.1703180968761444, "rewards/rejected": -0.9189084768295288, "step": 7391 }, { "epoch": 0.85, "learning_rate": 4.501931405829333e-08, "logits/chosen": -2.4079558849334717, "logits/rejected": -2.7242348194122314, "logps/chosen": -337.7750549316406, "logps/rejected": -307.7074890136719, "loss": 0.548, "rewards/accuracies": 0.75, "rewards/chosen": -0.22043690085411072, "rewards/margins": 1.6949173212051392, "rewards/rejected": -1.9153542518615723, "step": 7392 }, { "epoch": 0.85, "learning_rate": 4.4984197588669086e-08, "logits/chosen": -3.0514755249023438, "logits/rejected": -3.217343330383301, "logps/chosen": -255.41229248046875, "logps/rejected": -232.87998962402344, "loss": 0.4195, "rewards/accuracies": 0.625, "rewards/chosen": -0.725545346736908, "rewards/margins": 1.4105339050292969, "rewards/rejected": -2.1360793113708496, "step": 7393 }, { "epoch": 0.85, "learning_rate": 4.4949081119044826e-08, "logits/chosen": -2.8487937450408936, "logits/rejected": -3.308960437774658, "logps/chosen": -211.62986755371094, "logps/rejected": -400.3833923339844, "loss": 0.3996, "rewards/accuracies": 0.75, "rewards/chosen": -0.5617678761482239, "rewards/margins": 2.8462960720062256, "rewards/rejected": -3.4080638885498047, "step": 7394 }, { "epoch": 0.85, "learning_rate": 4.491396464942058e-08, "logits/chosen": -3.2647128105163574, "logits/rejected": -3.1874845027923584, "logps/chosen": -269.5652160644531, "logps/rejected": -176.9139404296875, "loss": 0.5296, "rewards/accuracies": 0.625, "rewards/chosen": -0.4171907305717468, "rewards/margins": 1.238340973854065, "rewards/rejected": -1.655531644821167, "step": 7395 }, { "epoch": 0.85, "learning_rate": 4.487884817979632e-08, "logits/chosen": -3.0367066860198975, "logits/rejected": -3.0103440284729004, "logps/chosen": -292.23028564453125, "logps/rejected": -309.94158935546875, "loss": 0.3348, "rewards/accuracies": 0.875, "rewards/chosen": 0.04001212120056152, "rewards/margins": 1.2863304615020752, "rewards/rejected": -1.2463182210922241, "step": 7396 }, { "epoch": 0.85, "learning_rate": 4.484373171017207e-08, "logits/chosen": -3.1643362045288086, "logits/rejected": -3.305773973464966, "logps/chosen": -204.40487670898438, "logps/rejected": -253.90499877929688, "loss": 0.2737, "rewards/accuracies": 0.875, "rewards/chosen": 0.1974877566099167, "rewards/margins": 2.6436800956726074, "rewards/rejected": -2.4461922645568848, "step": 7397 }, { "epoch": 0.85, "learning_rate": 4.480861524054781e-08, "logits/chosen": -2.712146759033203, "logits/rejected": -2.7704479694366455, "logps/chosen": -518.696533203125, "logps/rejected": -312.71044921875, "loss": 0.6575, "rewards/accuracies": 0.625, "rewards/chosen": -0.3837507963180542, "rewards/margins": 0.5743777751922607, "rewards/rejected": -0.9581286311149597, "step": 7398 }, { "epoch": 0.85, "learning_rate": 4.477349877092356e-08, "logits/chosen": -2.5693016052246094, "logits/rejected": -2.5571699142456055, "logps/chosen": -310.2842102050781, "logps/rejected": -345.7493896484375, "loss": 0.3753, "rewards/accuracies": 1.0, "rewards/chosen": -0.47799158096313477, "rewards/margins": 1.6927895545959473, "rewards/rejected": -2.170781135559082, "step": 7399 }, { "epoch": 0.85, "learning_rate": 4.47383823012993e-08, "logits/chosen": -2.4264109134674072, "logits/rejected": -2.09572172164917, "logps/chosen": -432.15863037109375, "logps/rejected": -356.7404479980469, "loss": 0.2741, "rewards/accuracies": 0.875, "rewards/chosen": -0.2651725709438324, "rewards/margins": 2.40944766998291, "rewards/rejected": -2.6746201515197754, "step": 7400 }, { "epoch": 0.85, "learning_rate": 4.470326583167506e-08, "logits/chosen": -2.9492177963256836, "logits/rejected": -3.3205671310424805, "logps/chosen": -197.17459106445312, "logps/rejected": -294.949951171875, "loss": 0.0733, "rewards/accuracies": 1.0, "rewards/chosen": -0.03179468959569931, "rewards/margins": 4.291082859039307, "rewards/rejected": -4.322877407073975, "step": 7401 }, { "epoch": 0.85, "learning_rate": 4.46681493620508e-08, "logits/chosen": -3.1109910011291504, "logits/rejected": -2.9824752807617188, "logps/chosen": -189.6608428955078, "logps/rejected": -201.69338989257812, "loss": 0.4808, "rewards/accuracies": 0.875, "rewards/chosen": -0.20206981897354126, "rewards/margins": 1.4606117010116577, "rewards/rejected": -1.6626814603805542, "step": 7402 }, { "epoch": 0.85, "learning_rate": 4.4633032892426545e-08, "logits/chosen": -2.262728214263916, "logits/rejected": -2.52956223487854, "logps/chosen": -267.68048095703125, "logps/rejected": -249.97149658203125, "loss": 0.4842, "rewards/accuracies": 0.75, "rewards/chosen": 0.39689165353775024, "rewards/margins": 1.348945140838623, "rewards/rejected": -0.952053427696228, "step": 7403 }, { "epoch": 0.85, "learning_rate": 4.459791642280229e-08, "logits/chosen": -3.336676597595215, "logits/rejected": -3.2782764434814453, "logps/chosen": -132.73280334472656, "logps/rejected": -177.72657775878906, "loss": 0.2869, "rewards/accuracies": 1.0, "rewards/chosen": 0.0846312940120697, "rewards/margins": 1.5497756004333496, "rewards/rejected": -1.465144395828247, "step": 7404 }, { "epoch": 0.85, "learning_rate": 4.456279995317804e-08, "logits/chosen": -2.343111038208008, "logits/rejected": -2.3094818592071533, "logps/chosen": -331.24420166015625, "logps/rejected": -218.10244750976562, "loss": 0.7811, "rewards/accuracies": 0.5, "rewards/chosen": -0.4097417891025543, "rewards/margins": 0.3874269425868988, "rewards/rejected": -0.7971687316894531, "step": 7405 }, { "epoch": 0.85, "learning_rate": 4.452768348355378e-08, "logits/chosen": -3.1395747661590576, "logits/rejected": -3.019679307937622, "logps/chosen": -318.7430419921875, "logps/rejected": -286.90472412109375, "loss": 0.2352, "rewards/accuracies": 0.875, "rewards/chosen": 0.20777207612991333, "rewards/margins": 2.0578627586364746, "rewards/rejected": -1.850090742111206, "step": 7406 }, { "epoch": 0.85, "learning_rate": 4.4492567013929534e-08, "logits/chosen": -3.555464267730713, "logits/rejected": -3.4460277557373047, "logps/chosen": -232.82144165039062, "logps/rejected": -372.8388671875, "loss": 0.1843, "rewards/accuracies": 1.0, "rewards/chosen": 0.0011692345142364502, "rewards/margins": 2.610886335372925, "rewards/rejected": -2.60971736907959, "step": 7407 }, { "epoch": 0.85, "learning_rate": 4.4457450544305275e-08, "logits/chosen": -2.878420352935791, "logits/rejected": -2.613579750061035, "logps/chosen": -206.58566284179688, "logps/rejected": -177.5654296875, "loss": 0.5588, "rewards/accuracies": 0.625, "rewards/chosen": -0.05352276563644409, "rewards/margins": 0.4265248775482178, "rewards/rejected": -0.48004770278930664, "step": 7408 }, { "epoch": 0.85, "learning_rate": 4.442233407468103e-08, "logits/chosen": -2.9768404960632324, "logits/rejected": -2.965655565261841, "logps/chosen": -176.04010009765625, "logps/rejected": -515.2606201171875, "loss": 0.0935, "rewards/accuracies": 1.0, "rewards/chosen": 0.06351907551288605, "rewards/margins": 3.183511257171631, "rewards/rejected": -3.119992256164551, "step": 7409 }, { "epoch": 0.85, "learning_rate": 4.438721760505677e-08, "logits/chosen": -2.86220121383667, "logits/rejected": -2.861907958984375, "logps/chosen": -242.9217987060547, "logps/rejected": -194.4593505859375, "loss": 0.3899, "rewards/accuracies": 0.625, "rewards/chosen": -0.2799178957939148, "rewards/margins": 1.1459925174713135, "rewards/rejected": -1.425910234451294, "step": 7410 }, { "epoch": 0.85, "learning_rate": 4.4352101135432516e-08, "logits/chosen": -3.199709415435791, "logits/rejected": -3.2326958179473877, "logps/chosen": -418.71923828125, "logps/rejected": -317.1479797363281, "loss": 0.2341, "rewards/accuracies": 0.875, "rewards/chosen": 0.6093025207519531, "rewards/margins": 2.0537467002868652, "rewards/rejected": -1.444444179534912, "step": 7411 }, { "epoch": 0.85, "learning_rate": 4.4316984665808264e-08, "logits/chosen": -3.2416892051696777, "logits/rejected": -2.8967254161834717, "logps/chosen": -304.15472412109375, "logps/rejected": -192.35365295410156, "loss": 0.4707, "rewards/accuracies": 0.75, "rewards/chosen": -0.37001144886016846, "rewards/margins": 1.7227500677108765, "rewards/rejected": -2.092761516571045, "step": 7412 }, { "epoch": 0.85, "learning_rate": 4.428186819618401e-08, "logits/chosen": -2.635300636291504, "logits/rejected": -2.6542248725891113, "logps/chosen": -245.8482666015625, "logps/rejected": -227.01280212402344, "loss": 0.439, "rewards/accuracies": 0.875, "rewards/chosen": 0.4488365650177002, "rewards/margins": 1.2716432809829712, "rewards/rejected": -0.8228066563606262, "step": 7413 }, { "epoch": 0.85, "learning_rate": 4.424675172655975e-08, "logits/chosen": -3.0005171298980713, "logits/rejected": -2.995628595352173, "logps/chosen": -381.68048095703125, "logps/rejected": -245.37933349609375, "loss": 0.208, "rewards/accuracies": 1.0, "rewards/chosen": 0.4776992201805115, "rewards/margins": 2.2707571983337402, "rewards/rejected": -1.793057918548584, "step": 7414 }, { "epoch": 0.85, "learning_rate": 4.4211635256935505e-08, "logits/chosen": -2.3998003005981445, "logits/rejected": -2.5123462677001953, "logps/chosen": -184.46530151367188, "logps/rejected": -145.26663208007812, "loss": 0.8506, "rewards/accuracies": 0.5, "rewards/chosen": -0.435015469789505, "rewards/margins": -0.1642000377178192, "rewards/rejected": -0.2708154320716858, "step": 7415 }, { "epoch": 0.85, "learning_rate": 4.4176518787311246e-08, "logits/chosen": -3.207338809967041, "logits/rejected": -3.021148681640625, "logps/chosen": -328.3614501953125, "logps/rejected": -269.80859375, "loss": 0.4364, "rewards/accuracies": 0.875, "rewards/chosen": -0.4673064947128296, "rewards/margins": 1.484609842300415, "rewards/rejected": -1.951916217803955, "step": 7416 }, { "epoch": 0.86, "learning_rate": 4.414140231768699e-08, "logits/chosen": -3.6868019104003906, "logits/rejected": -3.7610180377960205, "logps/chosen": -142.55615234375, "logps/rejected": -175.34609985351562, "loss": 0.2302, "rewards/accuracies": 0.875, "rewards/chosen": 0.27270764112472534, "rewards/margins": 2.208280086517334, "rewards/rejected": -1.9355722665786743, "step": 7417 }, { "epoch": 0.86, "learning_rate": 4.410628584806274e-08, "logits/chosen": -3.2152209281921387, "logits/rejected": -3.026519775390625, "logps/chosen": -315.69024658203125, "logps/rejected": -258.5008544921875, "loss": 0.392, "rewards/accuracies": 0.875, "rewards/chosen": -0.05569286644458771, "rewards/margins": 1.4244757890701294, "rewards/rejected": -1.4801685810089111, "step": 7418 }, { "epoch": 0.86, "learning_rate": 4.407116937843849e-08, "logits/chosen": -3.266249179840088, "logits/rejected": -3.2555222511291504, "logps/chosen": -241.1493682861328, "logps/rejected": -216.04225158691406, "loss": 0.1234, "rewards/accuracies": 1.0, "rewards/chosen": 0.8808154463768005, "rewards/margins": 2.6715993881225586, "rewards/rejected": -1.7907840013504028, "step": 7419 }, { "epoch": 0.86, "learning_rate": 4.403605290881423e-08, "logits/chosen": -3.2370264530181885, "logits/rejected": -3.2843141555786133, "logps/chosen": -108.50077819824219, "logps/rejected": -204.26417541503906, "loss": 0.5156, "rewards/accuracies": 0.625, "rewards/chosen": -0.2773430049419403, "rewards/margins": 1.2045574188232422, "rewards/rejected": -1.4819004535675049, "step": 7420 }, { "epoch": 0.86, "learning_rate": 4.400093643918998e-08, "logits/chosen": -3.469696283340454, "logits/rejected": -3.1000804901123047, "logps/chosen": -314.03216552734375, "logps/rejected": -293.0509948730469, "loss": 0.2632, "rewards/accuracies": 0.875, "rewards/chosen": 0.01044502854347229, "rewards/margins": 1.960181713104248, "rewards/rejected": -1.9497368335723877, "step": 7421 }, { "epoch": 0.86, "learning_rate": 4.396581996956572e-08, "logits/chosen": -3.441765308380127, "logits/rejected": -3.424752712249756, "logps/chosen": -208.7914581298828, "logps/rejected": -191.16683959960938, "loss": 0.401, "rewards/accuracies": 0.75, "rewards/chosen": -0.8385730981826782, "rewards/margins": 1.5056648254394531, "rewards/rejected": -2.344237804412842, "step": 7422 }, { "epoch": 0.86, "learning_rate": 4.3930703499941477e-08, "logits/chosen": -2.813591718673706, "logits/rejected": -2.729159116744995, "logps/chosen": -249.24620056152344, "logps/rejected": -151.74623107910156, "loss": 0.1869, "rewards/accuracies": 1.0, "rewards/chosen": 0.04918160289525986, "rewards/margins": 1.787903904914856, "rewards/rejected": -1.738722324371338, "step": 7423 }, { "epoch": 0.86, "learning_rate": 4.389558703031722e-08, "logits/chosen": -3.12668514251709, "logits/rejected": -3.4513092041015625, "logps/chosen": -693.62646484375, "logps/rejected": -389.7574462890625, "loss": 0.1097, "rewards/accuracies": 1.0, "rewards/chosen": -0.14287053048610687, "rewards/margins": 4.53176212310791, "rewards/rejected": -4.674632549285889, "step": 7424 }, { "epoch": 0.86, "learning_rate": 4.3860470560692964e-08, "logits/chosen": -2.908275842666626, "logits/rejected": -2.8644351959228516, "logps/chosen": -441.10308837890625, "logps/rejected": -255.0076446533203, "loss": 0.9223, "rewards/accuracies": 0.5, "rewards/chosen": -0.7453814148902893, "rewards/margins": 0.5440059900283813, "rewards/rejected": -1.2893874645233154, "step": 7425 }, { "epoch": 0.86, "learning_rate": 4.382535409106871e-08, "logits/chosen": -2.8634865283966064, "logits/rejected": -2.9170637130737305, "logps/chosen": -315.28106689453125, "logps/rejected": -325.6986083984375, "loss": 0.2286, "rewards/accuracies": 1.0, "rewards/chosen": -0.06391158699989319, "rewards/margins": 1.8343620300292969, "rewards/rejected": -1.8982734680175781, "step": 7426 }, { "epoch": 0.86, "learning_rate": 4.379023762144446e-08, "logits/chosen": -3.280503273010254, "logits/rejected": -3.394077777862549, "logps/chosen": -370.7622375488281, "logps/rejected": -373.8385925292969, "loss": 0.3761, "rewards/accuracies": 0.875, "rewards/chosen": 0.3408520221710205, "rewards/margins": 2.0996592044830322, "rewards/rejected": -1.7588070631027222, "step": 7427 }, { "epoch": 0.86, "learning_rate": 4.37551211518202e-08, "logits/chosen": -3.0971603393554688, "logits/rejected": -2.995231866836548, "logps/chosen": -270.17974853515625, "logps/rejected": -250.91070556640625, "loss": 0.4489, "rewards/accuracies": 0.625, "rewards/chosen": -0.18312904238700867, "rewards/margins": 2.2842912673950195, "rewards/rejected": -2.4674201011657715, "step": 7428 }, { "epoch": 0.86, "learning_rate": 4.3720004682195953e-08, "logits/chosen": -2.7729389667510986, "logits/rejected": -2.5030312538146973, "logps/chosen": -257.87652587890625, "logps/rejected": -470.9156494140625, "loss": 0.8878, "rewards/accuracies": 0.625, "rewards/chosen": -0.16276590526103973, "rewards/margins": 1.0994665622711182, "rewards/rejected": -1.2622325420379639, "step": 7429 }, { "epoch": 0.86, "learning_rate": 4.3684888212571694e-08, "logits/chosen": -3.775862216949463, "logits/rejected": -3.2605857849121094, "logps/chosen": -539.2446899414062, "logps/rejected": -240.41888427734375, "loss": 0.2115, "rewards/accuracies": 0.875, "rewards/chosen": -0.41220584511756897, "rewards/margins": 2.4417471885681152, "rewards/rejected": -2.8539528846740723, "step": 7430 }, { "epoch": 0.86, "learning_rate": 4.364977174294744e-08, "logits/chosen": -2.8562934398651123, "logits/rejected": -3.0854148864746094, "logps/chosen": -360.49859619140625, "logps/rejected": -357.86651611328125, "loss": 0.6366, "rewards/accuracies": 0.625, "rewards/chosen": -0.6102811694145203, "rewards/margins": 0.2912939190864563, "rewards/rejected": -0.9015750885009766, "step": 7431 }, { "epoch": 0.86, "learning_rate": 4.361465527332319e-08, "logits/chosen": -3.2375054359436035, "logits/rejected": -3.2511098384857178, "logps/chosen": -119.41746520996094, "logps/rejected": -250.4125518798828, "loss": 0.3508, "rewards/accuracies": 0.875, "rewards/chosen": -0.06495543569326401, "rewards/margins": 2.3460142612457275, "rewards/rejected": -2.4109697341918945, "step": 7432 }, { "epoch": 0.86, "learning_rate": 4.3579538803698936e-08, "logits/chosen": -2.597503662109375, "logits/rejected": -2.5739548206329346, "logps/chosen": -166.81658935546875, "logps/rejected": -232.29730224609375, "loss": 0.6304, "rewards/accuracies": 0.625, "rewards/chosen": -0.2544320821762085, "rewards/margins": 0.6507646441459656, "rewards/rejected": -0.9051968455314636, "step": 7433 }, { "epoch": 0.86, "learning_rate": 4.3544422334074676e-08, "logits/chosen": -3.5229570865631104, "logits/rejected": -3.1696155071258545, "logps/chosen": -410.3159484863281, "logps/rejected": -251.21609497070312, "loss": 0.3764, "rewards/accuracies": 0.875, "rewards/chosen": -0.15014207363128662, "rewards/margins": 1.4456764459609985, "rewards/rejected": -1.5958185195922852, "step": 7434 }, { "epoch": 0.86, "learning_rate": 4.3509305864450424e-08, "logits/chosen": -2.6682639122009277, "logits/rejected": -2.5484607219696045, "logps/chosen": -256.0222473144531, "logps/rejected": -241.33807373046875, "loss": 0.2605, "rewards/accuracies": 1.0, "rewards/chosen": -0.09968234598636627, "rewards/margins": 1.6386700868606567, "rewards/rejected": -1.7383522987365723, "step": 7435 }, { "epoch": 0.86, "learning_rate": 4.347418939482617e-08, "logits/chosen": -2.960486888885498, "logits/rejected": -3.165278196334839, "logps/chosen": -122.10794830322266, "logps/rejected": -161.94488525390625, "loss": 0.3525, "rewards/accuracies": 0.75, "rewards/chosen": -0.6823269128799438, "rewards/margins": 1.7886614799499512, "rewards/rejected": -2.4709885120391846, "step": 7436 }, { "epoch": 0.86, "learning_rate": 4.343907292520191e-08, "logits/chosen": -2.917811393737793, "logits/rejected": -2.833866596221924, "logps/chosen": -280.2016906738281, "logps/rejected": -369.5550537109375, "loss": 0.6243, "rewards/accuracies": 0.75, "rewards/chosen": -0.44674086570739746, "rewards/margins": 0.7635760307312012, "rewards/rejected": -1.2103168964385986, "step": 7437 }, { "epoch": 0.86, "learning_rate": 4.3403956455577665e-08, "logits/chosen": -2.701106071472168, "logits/rejected": -2.6974923610687256, "logps/chosen": -159.37461853027344, "logps/rejected": -319.3586120605469, "loss": 0.5298, "rewards/accuracies": 0.625, "rewards/chosen": 0.40425658226013184, "rewards/margins": 1.308295726776123, "rewards/rejected": -0.904039204120636, "step": 7438 }, { "epoch": 0.86, "learning_rate": 4.3368839985953406e-08, "logits/chosen": -3.196037530899048, "logits/rejected": -3.5752949714660645, "logps/chosen": -233.58392333984375, "logps/rejected": -207.5394744873047, "loss": 0.5965, "rewards/accuracies": 0.625, "rewards/chosen": -0.36246296763420105, "rewards/margins": 1.1036020517349243, "rewards/rejected": -1.4660649299621582, "step": 7439 }, { "epoch": 0.86, "learning_rate": 4.333372351632916e-08, "logits/chosen": -3.2297093868255615, "logits/rejected": -3.354759693145752, "logps/chosen": -163.96890258789062, "logps/rejected": -265.93804931640625, "loss": 0.5298, "rewards/accuracies": 0.75, "rewards/chosen": -0.2608449459075928, "rewards/margins": 1.3344228267669678, "rewards/rejected": -1.595267653465271, "step": 7440 }, { "epoch": 0.86, "learning_rate": 4.32986070467049e-08, "logits/chosen": -3.4136037826538086, "logits/rejected": -3.527000904083252, "logps/chosen": -275.0611877441406, "logps/rejected": -514.3787231445312, "loss": 0.3599, "rewards/accuracies": 0.75, "rewards/chosen": -0.10396989434957504, "rewards/margins": 2.384913444519043, "rewards/rejected": -2.4888834953308105, "step": 7441 }, { "epoch": 0.86, "learning_rate": 4.326349057708065e-08, "logits/chosen": -3.108804702758789, "logits/rejected": -3.1495018005371094, "logps/chosen": -327.5858459472656, "logps/rejected": -328.162353515625, "loss": 0.4506, "rewards/accuracies": 0.75, "rewards/chosen": -0.05809244513511658, "rewards/margins": 1.346487045288086, "rewards/rejected": -1.4045796394348145, "step": 7442 }, { "epoch": 0.86, "learning_rate": 4.3228374107456395e-08, "logits/chosen": -3.224019765853882, "logits/rejected": -3.375354766845703, "logps/chosen": -136.7716064453125, "logps/rejected": -168.18142700195312, "loss": 0.3838, "rewards/accuracies": 0.875, "rewards/chosen": 0.38671931624412537, "rewards/margins": 2.322164535522461, "rewards/rejected": -1.9354451894760132, "step": 7443 }, { "epoch": 0.86, "learning_rate": 4.319325763783214e-08, "logits/chosen": -2.3665990829467773, "logits/rejected": -2.3921420574188232, "logps/chosen": -442.6899719238281, "logps/rejected": -412.4638977050781, "loss": 0.2839, "rewards/accuracies": 1.0, "rewards/chosen": 0.2534828186035156, "rewards/margins": 2.2681589126586914, "rewards/rejected": -2.014676094055176, "step": 7444 }, { "epoch": 0.86, "learning_rate": 4.315814116820788e-08, "logits/chosen": -3.2274246215820312, "logits/rejected": -3.620516777038574, "logps/chosen": -283.6339111328125, "logps/rejected": -267.26873779296875, "loss": 0.2167, "rewards/accuracies": 0.875, "rewards/chosen": 0.06238704174757004, "rewards/margins": 3.1818408966064453, "rewards/rejected": -3.1194536685943604, "step": 7445 }, { "epoch": 0.86, "learning_rate": 4.3123024698583637e-08, "logits/chosen": -3.584656238555908, "logits/rejected": -3.330528736114502, "logps/chosen": -118.53046417236328, "logps/rejected": -90.88018798828125, "loss": 0.7956, "rewards/accuracies": 0.625, "rewards/chosen": -0.23851390182971954, "rewards/margins": 0.48337507247924805, "rewards/rejected": -0.7218888998031616, "step": 7446 }, { "epoch": 0.86, "learning_rate": 4.308790822895938e-08, "logits/chosen": -2.9882686138153076, "logits/rejected": -3.0370326042175293, "logps/chosen": -213.92327880859375, "logps/rejected": -206.5742645263672, "loss": 0.7907, "rewards/accuracies": 0.375, "rewards/chosen": 0.17497240006923676, "rewards/margins": 0.7127736806869507, "rewards/rejected": -0.537801206111908, "step": 7447 }, { "epoch": 0.86, "learning_rate": 4.3052791759335124e-08, "logits/chosen": -2.874072551727295, "logits/rejected": -2.9871110916137695, "logps/chosen": -287.23486328125, "logps/rejected": -264.3133544921875, "loss": 0.3816, "rewards/accuracies": 0.75, "rewards/chosen": 0.05959977209568024, "rewards/margins": 1.2330803871154785, "rewards/rejected": -1.1734806299209595, "step": 7448 }, { "epoch": 0.86, "learning_rate": 4.301767528971087e-08, "logits/chosen": -3.453993797302246, "logits/rejected": -3.5075936317443848, "logps/chosen": -384.8571472167969, "logps/rejected": -300.44427490234375, "loss": 0.3232, "rewards/accuracies": 0.875, "rewards/chosen": -0.30325180292129517, "rewards/margins": 2.3086206912994385, "rewards/rejected": -2.611872434616089, "step": 7449 }, { "epoch": 0.86, "learning_rate": 4.298255882008662e-08, "logits/chosen": -3.5823540687561035, "logits/rejected": -3.8320541381835938, "logps/chosen": -183.92926025390625, "logps/rejected": -242.208251953125, "loss": 0.2209, "rewards/accuracies": 1.0, "rewards/chosen": -0.007891088724136353, "rewards/margins": 2.080946683883667, "rewards/rejected": -2.0888378620147705, "step": 7450 }, { "epoch": 0.86, "learning_rate": 4.294744235046236e-08, "logits/chosen": -2.886143684387207, "logits/rejected": -2.966383695602417, "logps/chosen": -327.53594970703125, "logps/rejected": -299.966552734375, "loss": 0.347, "rewards/accuracies": 0.875, "rewards/chosen": -0.11361183971166611, "rewards/margins": 1.9461798667907715, "rewards/rejected": -2.0597920417785645, "step": 7451 }, { "epoch": 0.86, "learning_rate": 4.2912325880838113e-08, "logits/chosen": -3.3536858558654785, "logits/rejected": -3.3786818981170654, "logps/chosen": -287.83367919921875, "logps/rejected": -240.60699462890625, "loss": 0.2803, "rewards/accuracies": 1.0, "rewards/chosen": -0.033266451209783554, "rewards/margins": 1.7147939205169678, "rewards/rejected": -1.7480604648590088, "step": 7452 }, { "epoch": 0.86, "learning_rate": 4.2877209411213854e-08, "logits/chosen": -2.844278573989868, "logits/rejected": -2.793128490447998, "logps/chosen": -119.99345397949219, "logps/rejected": -410.7127685546875, "loss": 0.5345, "rewards/accuracies": 0.75, "rewards/chosen": -1.4307541847229004, "rewards/margins": 1.6598570346832275, "rewards/rejected": -3.090610980987549, "step": 7453 }, { "epoch": 0.86, "learning_rate": 4.284209294158961e-08, "logits/chosen": -3.179813861846924, "logits/rejected": -3.3362083435058594, "logps/chosen": -204.34750366210938, "logps/rejected": -345.2425231933594, "loss": 0.4562, "rewards/accuracies": 0.625, "rewards/chosen": 0.10708782076835632, "rewards/margins": 3.1033618450164795, "rewards/rejected": -2.99627423286438, "step": 7454 }, { "epoch": 0.86, "learning_rate": 4.280697647196535e-08, "logits/chosen": -3.684847354888916, "logits/rejected": -3.5177319049835205, "logps/chosen": -177.5576629638672, "logps/rejected": -160.0171661376953, "loss": 0.2672, "rewards/accuracies": 1.0, "rewards/chosen": -0.15750280022621155, "rewards/margins": 1.802829623222351, "rewards/rejected": -1.9603323936462402, "step": 7455 }, { "epoch": 0.86, "learning_rate": 4.2771860002341096e-08, "logits/chosen": -3.132546901702881, "logits/rejected": -2.879006862640381, "logps/chosen": -275.4530944824219, "logps/rejected": -275.2625427246094, "loss": 0.3477, "rewards/accuracies": 0.75, "rewards/chosen": 0.46610745787620544, "rewards/margins": 1.7877979278564453, "rewards/rejected": -1.321690320968628, "step": 7456 }, { "epoch": 0.86, "learning_rate": 4.273674353271684e-08, "logits/chosen": -2.4907331466674805, "logits/rejected": -2.356266498565674, "logps/chosen": -304.0224914550781, "logps/rejected": -284.18243408203125, "loss": 0.7237, "rewards/accuracies": 0.75, "rewards/chosen": 0.10557231307029724, "rewards/margins": 1.4116867780685425, "rewards/rejected": -1.3061144351959229, "step": 7457 }, { "epoch": 0.86, "learning_rate": 4.270162706309259e-08, "logits/chosen": -2.6891605854034424, "logits/rejected": -3.0345568656921387, "logps/chosen": -271.20416259765625, "logps/rejected": -241.65138244628906, "loss": 0.3497, "rewards/accuracies": 0.875, "rewards/chosen": -0.5012956261634827, "rewards/margins": 1.2683813571929932, "rewards/rejected": -1.7696770429611206, "step": 7458 }, { "epoch": 0.86, "learning_rate": 4.266651059346833e-08, "logits/chosen": -3.034236431121826, "logits/rejected": -2.9846014976501465, "logps/chosen": -338.949951171875, "logps/rejected": -300.1862487792969, "loss": 0.2965, "rewards/accuracies": 0.875, "rewards/chosen": -0.2846730947494507, "rewards/margins": 2.5205001831054688, "rewards/rejected": -2.80517315864563, "step": 7459 }, { "epoch": 0.86, "learning_rate": 4.2631394123844085e-08, "logits/chosen": -3.2985048294067383, "logits/rejected": -3.3573484420776367, "logps/chosen": -347.2588806152344, "logps/rejected": -239.73162841796875, "loss": 0.1661, "rewards/accuracies": 1.0, "rewards/chosen": -0.32636868953704834, "rewards/margins": 3.2175326347351074, "rewards/rejected": -3.5439016819000244, "step": 7460 }, { "epoch": 0.86, "learning_rate": 4.2596277654219825e-08, "logits/chosen": -3.7688989639282227, "logits/rejected": -3.655292510986328, "logps/chosen": -335.7353820800781, "logps/rejected": -334.39312744140625, "loss": 0.5587, "rewards/accuracies": 0.75, "rewards/chosen": -0.6973587870597839, "rewards/margins": 1.3293330669403076, "rewards/rejected": -2.0266916751861572, "step": 7461 }, { "epoch": 0.86, "learning_rate": 4.256116118459558e-08, "logits/chosen": -2.362692356109619, "logits/rejected": -2.668851375579834, "logps/chosen": -341.8357849121094, "logps/rejected": -315.2247619628906, "loss": 1.1995, "rewards/accuracies": 0.625, "rewards/chosen": -0.8725191950798035, "rewards/margins": 0.3874571919441223, "rewards/rejected": -1.2599763870239258, "step": 7462 }, { "epoch": 0.86, "learning_rate": 4.252604471497132e-08, "logits/chosen": -2.983443021774292, "logits/rejected": -2.673900842666626, "logps/chosen": -276.78863525390625, "logps/rejected": -220.83103942871094, "loss": 0.3586, "rewards/accuracies": 0.875, "rewards/chosen": 0.2632012963294983, "rewards/margins": 1.620740532875061, "rewards/rejected": -1.357539176940918, "step": 7463 }, { "epoch": 0.86, "learning_rate": 4.249092824534707e-08, "logits/chosen": -3.424574375152588, "logits/rejected": -3.274334192276001, "logps/chosen": -324.7206115722656, "logps/rejected": -227.92291259765625, "loss": 0.3883, "rewards/accuracies": 0.75, "rewards/chosen": 0.41919419169425964, "rewards/margins": 2.2573773860931396, "rewards/rejected": -1.8381831645965576, "step": 7464 }, { "epoch": 0.86, "learning_rate": 4.245581177572281e-08, "logits/chosen": -3.573768138885498, "logits/rejected": -3.5019264221191406, "logps/chosen": -244.967529296875, "logps/rejected": -179.06808471679688, "loss": 0.3743, "rewards/accuracies": 0.75, "rewards/chosen": -0.759601354598999, "rewards/margins": 1.6167445182800293, "rewards/rejected": -2.3763458728790283, "step": 7465 }, { "epoch": 0.86, "learning_rate": 4.242069530609856e-08, "logits/chosen": -3.2868311405181885, "logits/rejected": -2.9286670684814453, "logps/chosen": -280.3487854003906, "logps/rejected": -325.92730712890625, "loss": 0.5011, "rewards/accuracies": 0.75, "rewards/chosen": -0.9654921293258667, "rewards/margins": 1.4948618412017822, "rewards/rejected": -2.4603538513183594, "step": 7466 }, { "epoch": 0.86, "learning_rate": 4.23855788364743e-08, "logits/chosen": -3.278658866882324, "logits/rejected": -2.9004158973693848, "logps/chosen": -260.00482177734375, "logps/rejected": -203.32496643066406, "loss": 0.2063, "rewards/accuracies": 1.0, "rewards/chosen": 0.24211648106575012, "rewards/margins": 2.291813373565674, "rewards/rejected": -2.049696922302246, "step": 7467 }, { "epoch": 0.86, "learning_rate": 4.2350462366850056e-08, "logits/chosen": -2.5139119625091553, "logits/rejected": -2.918936014175415, "logps/chosen": -278.2324523925781, "logps/rejected": -265.64056396484375, "loss": 0.2222, "rewards/accuracies": 0.75, "rewards/chosen": 0.008336499333381653, "rewards/margins": 3.565913677215576, "rewards/rejected": -3.557577133178711, "step": 7468 }, { "epoch": 0.86, "learning_rate": 4.2315345897225797e-08, "logits/chosen": -3.3000893592834473, "logits/rejected": -3.266570806503296, "logps/chosen": -252.02493286132812, "logps/rejected": -229.1265411376953, "loss": 0.5732, "rewards/accuracies": 0.875, "rewards/chosen": 0.1088513657450676, "rewards/margins": 1.0406867265701294, "rewards/rejected": -0.931835412979126, "step": 7469 }, { "epoch": 0.86, "learning_rate": 4.2280229427601544e-08, "logits/chosen": -2.84344482421875, "logits/rejected": -2.956928014755249, "logps/chosen": -547.812255859375, "logps/rejected": -253.36886596679688, "loss": 0.2535, "rewards/accuracies": 0.875, "rewards/chosen": -0.21478500962257385, "rewards/margins": 2.367927074432373, "rewards/rejected": -2.582712173461914, "step": 7470 }, { "epoch": 0.86, "learning_rate": 4.224511295797729e-08, "logits/chosen": -3.685360908508301, "logits/rejected": -3.652024984359741, "logps/chosen": -196.8019256591797, "logps/rejected": -166.2613525390625, "loss": 0.3634, "rewards/accuracies": 0.875, "rewards/chosen": -0.04689069092273712, "rewards/margins": 1.3754754066467285, "rewards/rejected": -1.4223662614822388, "step": 7471 }, { "epoch": 0.86, "learning_rate": 4.220999648835304e-08, "logits/chosen": -3.6516458988189697, "logits/rejected": -3.679670810699463, "logps/chosen": -365.682861328125, "logps/rejected": -321.343994140625, "loss": 0.5659, "rewards/accuracies": 0.75, "rewards/chosen": -0.6450765132904053, "rewards/margins": 3.3201582431793213, "rewards/rejected": -3.9652347564697266, "step": 7472 }, { "epoch": 0.86, "learning_rate": 4.217488001872878e-08, "logits/chosen": -2.4299967288970947, "logits/rejected": -2.47464656829834, "logps/chosen": -228.9652862548828, "logps/rejected": -282.2402648925781, "loss": 0.3287, "rewards/accuracies": 0.75, "rewards/chosen": 0.35253578424453735, "rewards/margins": 2.2304391860961914, "rewards/rejected": -1.877903699874878, "step": 7473 }, { "epoch": 0.86, "learning_rate": 4.213976354910453e-08, "logits/chosen": -3.2873289585113525, "logits/rejected": -3.3333115577697754, "logps/chosen": -163.8485107421875, "logps/rejected": -237.61489868164062, "loss": 0.2501, "rewards/accuracies": 1.0, "rewards/chosen": 0.5246586799621582, "rewards/margins": 2.243171453475952, "rewards/rejected": -1.718512773513794, "step": 7474 }, { "epoch": 0.86, "learning_rate": 4.2104647079480273e-08, "logits/chosen": -2.3115234375, "logits/rejected": -2.742823362350464, "logps/chosen": -277.1209411621094, "logps/rejected": -258.4232177734375, "loss": 0.4136, "rewards/accuracies": 0.625, "rewards/chosen": -0.029480498284101486, "rewards/margins": 1.3066778182983398, "rewards/rejected": -1.3361583948135376, "step": 7475 }, { "epoch": 0.86, "learning_rate": 4.206953060985603e-08, "logits/chosen": -3.0496387481689453, "logits/rejected": -2.8698458671569824, "logps/chosen": -350.2351989746094, "logps/rejected": -324.0829772949219, "loss": 0.0812, "rewards/accuracies": 1.0, "rewards/chosen": 0.7080439329147339, "rewards/margins": 3.947234869003296, "rewards/rejected": -3.2391910552978516, "step": 7476 }, { "epoch": 0.86, "learning_rate": 4.203441414023177e-08, "logits/chosen": -3.299542188644409, "logits/rejected": -3.1708617210388184, "logps/chosen": -339.5414733886719, "logps/rejected": -347.48199462890625, "loss": 0.2654, "rewards/accuracies": 0.875, "rewards/chosen": -0.2525096833705902, "rewards/margins": 2.4536845684051514, "rewards/rejected": -2.7061944007873535, "step": 7477 }, { "epoch": 0.86, "learning_rate": 4.1999297670607515e-08, "logits/chosen": -3.6724610328674316, "logits/rejected": -3.360605239868164, "logps/chosen": -248.93826293945312, "logps/rejected": -267.2522888183594, "loss": 0.6627, "rewards/accuracies": 0.625, "rewards/chosen": -0.6572730541229248, "rewards/margins": 1.0041768550872803, "rewards/rejected": -1.6614497900009155, "step": 7478 }, { "epoch": 0.86, "learning_rate": 4.196418120098326e-08, "logits/chosen": -3.0134902000427246, "logits/rejected": -3.123918056488037, "logps/chosen": -389.127685546875, "logps/rejected": -264.9132080078125, "loss": 0.1804, "rewards/accuracies": 0.875, "rewards/chosen": 0.14779280126094818, "rewards/margins": 2.4338390827178955, "rewards/rejected": -2.286046266555786, "step": 7479 }, { "epoch": 0.86, "learning_rate": 4.192906473135901e-08, "logits/chosen": -3.0954551696777344, "logits/rejected": -3.039759397506714, "logps/chosen": -435.6393127441406, "logps/rejected": -275.943603515625, "loss": 0.2179, "rewards/accuracies": 1.0, "rewards/chosen": 0.7951943874359131, "rewards/margins": 2.22257399559021, "rewards/rejected": -1.4273797273635864, "step": 7480 }, { "epoch": 0.86, "learning_rate": 4.189394826173475e-08, "logits/chosen": -3.071462869644165, "logits/rejected": -2.8943090438842773, "logps/chosen": -262.9268798828125, "logps/rejected": -265.0702209472656, "loss": 0.2635, "rewards/accuracies": 0.875, "rewards/chosen": 0.11157265305519104, "rewards/margins": 2.0797624588012695, "rewards/rejected": -1.9681898355484009, "step": 7481 }, { "epoch": 0.86, "learning_rate": 4.185883179211049e-08, "logits/chosen": -3.4534237384796143, "logits/rejected": -3.446528911590576, "logps/chosen": -318.72967529296875, "logps/rejected": -313.9820556640625, "loss": 0.3996, "rewards/accuracies": 0.875, "rewards/chosen": 0.24750715494155884, "rewards/margins": 1.840911865234375, "rewards/rejected": -1.5934045314788818, "step": 7482 }, { "epoch": 0.86, "learning_rate": 4.1823715322486245e-08, "logits/chosen": -3.7844297885894775, "logits/rejected": -3.8877103328704834, "logps/chosen": -244.54388427734375, "logps/rejected": -232.22509765625, "loss": 0.263, "rewards/accuracies": 0.875, "rewards/chosen": -0.08492715656757355, "rewards/margins": 3.0766897201538086, "rewards/rejected": -3.1616170406341553, "step": 7483 }, { "epoch": 0.86, "learning_rate": 4.1788598852861985e-08, "logits/chosen": -2.9961469173431396, "logits/rejected": -2.7090096473693848, "logps/chosen": -260.53289794921875, "logps/rejected": -286.3724365234375, "loss": 0.1086, "rewards/accuracies": 1.0, "rewards/chosen": 0.26659414172172546, "rewards/margins": 2.78131103515625, "rewards/rejected": -2.5147171020507812, "step": 7484 }, { "epoch": 0.86, "learning_rate": 4.175348238323774e-08, "logits/chosen": -2.8050222396850586, "logits/rejected": -2.820755958557129, "logps/chosen": -152.08262634277344, "logps/rejected": -117.54035949707031, "loss": 0.408, "rewards/accuracies": 0.875, "rewards/chosen": -0.0013656988739967346, "rewards/margins": 1.194110631942749, "rewards/rejected": -1.1954764127731323, "step": 7485 }, { "epoch": 0.86, "learning_rate": 4.171836591361348e-08, "logits/chosen": -3.3114819526672363, "logits/rejected": -2.991631031036377, "logps/chosen": -315.25103759765625, "logps/rejected": -169.7267303466797, "loss": 0.5911, "rewards/accuracies": 0.75, "rewards/chosen": -0.09069804847240448, "rewards/margins": 1.2933461666107178, "rewards/rejected": -1.3840441703796387, "step": 7486 }, { "epoch": 0.86, "learning_rate": 4.168324944398923e-08, "logits/chosen": -3.181676149368286, "logits/rejected": -2.9140467643737793, "logps/chosen": -418.30523681640625, "logps/rejected": -312.2193603515625, "loss": 0.2735, "rewards/accuracies": 0.875, "rewards/chosen": -0.40348368883132935, "rewards/margins": 2.4174458980560303, "rewards/rejected": -2.820929527282715, "step": 7487 }, { "epoch": 0.86, "learning_rate": 4.1648132974364974e-08, "logits/chosen": -3.116424560546875, "logits/rejected": -2.909649610519409, "logps/chosen": -275.90972900390625, "logps/rejected": -311.2938232421875, "loss": 0.3671, "rewards/accuracies": 0.75, "rewards/chosen": 0.32641926407814026, "rewards/margins": 1.2905763387680054, "rewards/rejected": -0.9641571044921875, "step": 7488 }, { "epoch": 0.86, "learning_rate": 4.161301650474072e-08, "logits/chosen": -3.090491771697998, "logits/rejected": -3.1310951709747314, "logps/chosen": -254.7110595703125, "logps/rejected": -272.84161376953125, "loss": 0.6334, "rewards/accuracies": 0.625, "rewards/chosen": -0.47798240184783936, "rewards/margins": 1.0938502550125122, "rewards/rejected": -1.5718326568603516, "step": 7489 }, { "epoch": 0.86, "learning_rate": 4.157790003511646e-08, "logits/chosen": -3.623589038848877, "logits/rejected": -2.9990170001983643, "logps/chosen": -568.4430541992188, "logps/rejected": -363.9608459472656, "loss": 0.3357, "rewards/accuracies": 0.875, "rewards/chosen": -0.9769412279129028, "rewards/margins": 2.153909683227539, "rewards/rejected": -3.1308507919311523, "step": 7490 }, { "epoch": 0.86, "learning_rate": 4.1542783565492216e-08, "logits/chosen": -2.6676650047302246, "logits/rejected": -2.611044406890869, "logps/chosen": -230.71136474609375, "logps/rejected": -247.94361877441406, "loss": 0.5683, "rewards/accuracies": 0.625, "rewards/chosen": -0.5342506170272827, "rewards/margins": 0.5014493465423584, "rewards/rejected": -1.0356999635696411, "step": 7491 }, { "epoch": 0.86, "learning_rate": 4.1507667095867957e-08, "logits/chosen": -2.454514265060425, "logits/rejected": -2.389425039291382, "logps/chosen": -384.9090576171875, "logps/rejected": -474.991455078125, "loss": 0.4961, "rewards/accuracies": 0.75, "rewards/chosen": -0.16366291046142578, "rewards/margins": 1.14713716506958, "rewards/rejected": -1.3108000755310059, "step": 7492 }, { "epoch": 0.86, "learning_rate": 4.147255062624371e-08, "logits/chosen": -2.606959581375122, "logits/rejected": -2.4475574493408203, "logps/chosen": -324.0587158203125, "logps/rejected": -186.76583862304688, "loss": 0.8865, "rewards/accuracies": 0.5, "rewards/chosen": -0.2293229103088379, "rewards/margins": 0.16705404222011566, "rewards/rejected": -0.39637693762779236, "step": 7493 }, { "epoch": 0.86, "learning_rate": 4.143743415661945e-08, "logits/chosen": -3.3281681537628174, "logits/rejected": -3.3591434955596924, "logps/chosen": -164.9707794189453, "logps/rejected": -180.0811309814453, "loss": 0.3258, "rewards/accuracies": 0.75, "rewards/chosen": 0.05662701278924942, "rewards/margins": 1.291383981704712, "rewards/rejected": -1.2347568273544312, "step": 7494 }, { "epoch": 0.86, "learning_rate": 4.14023176869952e-08, "logits/chosen": -3.0743939876556396, "logits/rejected": -3.1544923782348633, "logps/chosen": -384.84295654296875, "logps/rejected": -375.42315673828125, "loss": 0.9013, "rewards/accuracies": 0.375, "rewards/chosen": -0.21830201148986816, "rewards/margins": -0.1918463110923767, "rewards/rejected": -0.02645571529865265, "step": 7495 }, { "epoch": 0.86, "learning_rate": 4.1367201217370945e-08, "logits/chosen": -3.3725266456604004, "logits/rejected": -3.035233736038208, "logps/chosen": -420.79559326171875, "logps/rejected": -261.7130126953125, "loss": 0.2886, "rewards/accuracies": 1.0, "rewards/chosen": 0.10130177438259125, "rewards/margins": 1.6438865661621094, "rewards/rejected": -1.542584776878357, "step": 7496 }, { "epoch": 0.86, "learning_rate": 4.133208474774669e-08, "logits/chosen": -2.9534573554992676, "logits/rejected": -2.749403715133667, "logps/chosen": -191.3076171875, "logps/rejected": -219.6448974609375, "loss": 0.5168, "rewards/accuracies": 0.75, "rewards/chosen": 0.46717843413352966, "rewards/margins": 2.3963229656219482, "rewards/rejected": -1.9291445016860962, "step": 7497 }, { "epoch": 0.86, "learning_rate": 4.129696827812243e-08, "logits/chosen": -2.8842101097106934, "logits/rejected": -3.069355010986328, "logps/chosen": -185.71743774414062, "logps/rejected": -290.4529113769531, "loss": 0.235, "rewards/accuracies": 1.0, "rewards/chosen": 0.1757672131061554, "rewards/margins": 2.003011703491211, "rewards/rejected": -1.827244520187378, "step": 7498 }, { "epoch": 0.86, "learning_rate": 4.126185180849819e-08, "logits/chosen": -3.3114445209503174, "logits/rejected": -3.054583787918091, "logps/chosen": -405.69580078125, "logps/rejected": -415.8905029296875, "loss": 0.4532, "rewards/accuracies": 0.875, "rewards/chosen": -0.14278483390808105, "rewards/margins": 1.3095752000808716, "rewards/rejected": -1.452359914779663, "step": 7499 }, { "epoch": 0.86, "learning_rate": 4.122673533887393e-08, "logits/chosen": -1.9591503143310547, "logits/rejected": -1.8553916215896606, "logps/chosen": -246.1662139892578, "logps/rejected": -344.12335205078125, "loss": 0.3975, "rewards/accuracies": 0.75, "rewards/chosen": -0.0316905602812767, "rewards/margins": 1.7500603199005127, "rewards/rejected": -1.7817507982254028, "step": 7500 }, { "epoch": 0.86, "learning_rate": 4.1191618869249675e-08, "logits/chosen": -2.514021396636963, "logits/rejected": -2.9005823135375977, "logps/chosen": -197.38539123535156, "logps/rejected": -261.93023681640625, "loss": 0.373, "rewards/accuracies": 0.875, "rewards/chosen": -0.07319873571395874, "rewards/margins": 2.0075764656066895, "rewards/rejected": -2.080775260925293, "step": 7501 }, { "epoch": 0.86, "learning_rate": 4.115650239962542e-08, "logits/chosen": -2.884659767150879, "logits/rejected": -2.8863797187805176, "logps/chosen": -175.53038024902344, "logps/rejected": -196.93392944335938, "loss": 0.3917, "rewards/accuracies": 0.75, "rewards/chosen": 0.11727648973464966, "rewards/margins": 1.8456131219863892, "rewards/rejected": -1.7283365726470947, "step": 7502 }, { "epoch": 0.86, "learning_rate": 4.112138593000117e-08, "logits/chosen": -3.177049160003662, "logits/rejected": -3.5217041969299316, "logps/chosen": -287.2966613769531, "logps/rejected": -221.2364501953125, "loss": 0.4194, "rewards/accuracies": 0.75, "rewards/chosen": -0.07297545671463013, "rewards/margins": 1.7879159450531006, "rewards/rejected": -1.860891342163086, "step": 7503 }, { "epoch": 0.87, "learning_rate": 4.108626946037691e-08, "logits/chosen": -3.1697311401367188, "logits/rejected": -3.2719602584838867, "logps/chosen": -231.26246643066406, "logps/rejected": -236.20660400390625, "loss": 0.5863, "rewards/accuracies": 0.625, "rewards/chosen": -0.26350268721580505, "rewards/margins": 1.2640931606292725, "rewards/rejected": -1.5275959968566895, "step": 7504 }, { "epoch": 0.87, "learning_rate": 4.1051152990752664e-08, "logits/chosen": -2.5208098888397217, "logits/rejected": -2.491269588470459, "logps/chosen": -260.0534362792969, "logps/rejected": -430.389404296875, "loss": 0.5539, "rewards/accuracies": 0.625, "rewards/chosen": -0.3080190420150757, "rewards/margins": 1.37216317653656, "rewards/rejected": -1.6801822185516357, "step": 7505 }, { "epoch": 0.87, "learning_rate": 4.1016036521128405e-08, "logits/chosen": -3.1013498306274414, "logits/rejected": -3.210181474685669, "logps/chosen": -123.26429748535156, "logps/rejected": -237.262451171875, "loss": 0.4469, "rewards/accuracies": 0.75, "rewards/chosen": -0.6994352340698242, "rewards/margins": 1.352970004081726, "rewards/rejected": -2.0524051189422607, "step": 7506 }, { "epoch": 0.87, "learning_rate": 4.098092005150416e-08, "logits/chosen": -3.3366494178771973, "logits/rejected": -3.5089378356933594, "logps/chosen": -196.43618774414062, "logps/rejected": -243.03231811523438, "loss": 0.4846, "rewards/accuracies": 0.625, "rewards/chosen": 0.515826940536499, "rewards/margins": 1.2663086652755737, "rewards/rejected": -0.7504817247390747, "step": 7507 }, { "epoch": 0.87, "learning_rate": 4.09458035818799e-08, "logits/chosen": -2.879525899887085, "logits/rejected": -2.9187309741973877, "logps/chosen": -379.3719482421875, "logps/rejected": -393.375244140625, "loss": 0.4655, "rewards/accuracies": 0.625, "rewards/chosen": -0.27074727416038513, "rewards/margins": 1.5144315958023071, "rewards/rejected": -1.785178780555725, "step": 7508 }, { "epoch": 0.87, "learning_rate": 4.0910687112255646e-08, "logits/chosen": -3.0926856994628906, "logits/rejected": -3.245452880859375, "logps/chosen": -357.85064697265625, "logps/rejected": -420.8505554199219, "loss": 0.7255, "rewards/accuracies": 0.625, "rewards/chosen": -0.06324917823076248, "rewards/margins": 0.33078742027282715, "rewards/rejected": -0.39403659105300903, "step": 7509 }, { "epoch": 0.87, "learning_rate": 4.0875570642631394e-08, "logits/chosen": -3.3347227573394775, "logits/rejected": -3.516310453414917, "logps/chosen": -356.01812744140625, "logps/rejected": -315.1039123535156, "loss": 0.8123, "rewards/accuracies": 0.5, "rewards/chosen": -0.6355506181716919, "rewards/margins": 1.016275405883789, "rewards/rejected": -1.6518261432647705, "step": 7510 }, { "epoch": 0.87, "learning_rate": 4.084045417300714e-08, "logits/chosen": -3.159053325653076, "logits/rejected": -3.204591751098633, "logps/chosen": -108.9964370727539, "logps/rejected": -346.22528076171875, "loss": 0.1487, "rewards/accuracies": 1.0, "rewards/chosen": 0.23483017086982727, "rewards/margins": 4.039377212524414, "rewards/rejected": -3.804547071456909, "step": 7511 }, { "epoch": 0.87, "learning_rate": 4.080533770338288e-08, "logits/chosen": -3.118699073791504, "logits/rejected": -3.27553391456604, "logps/chosen": -440.9292907714844, "logps/rejected": -382.7550048828125, "loss": 0.2174, "rewards/accuracies": 0.875, "rewards/chosen": -0.19414466619491577, "rewards/margins": 3.3705883026123047, "rewards/rejected": -3.564732789993286, "step": 7512 }, { "epoch": 0.87, "learning_rate": 4.0770221233758635e-08, "logits/chosen": -3.103029251098633, "logits/rejected": -3.4165267944335938, "logps/chosen": -301.21356201171875, "logps/rejected": -225.13328552246094, "loss": 0.6869, "rewards/accuracies": 0.625, "rewards/chosen": -0.9004427194595337, "rewards/margins": 0.9406253099441528, "rewards/rejected": -1.8410680294036865, "step": 7513 }, { "epoch": 0.87, "learning_rate": 4.0735104764134376e-08, "logits/chosen": -3.385509967803955, "logits/rejected": -3.5735716819763184, "logps/chosen": -327.01092529296875, "logps/rejected": -266.5755920410156, "loss": 0.3032, "rewards/accuracies": 1.0, "rewards/chosen": 0.39524391293525696, "rewards/margins": 2.1370673179626465, "rewards/rejected": -1.741823434829712, "step": 7514 }, { "epoch": 0.87, "learning_rate": 4.069998829451012e-08, "logits/chosen": -2.763573169708252, "logits/rejected": -2.8655359745025635, "logps/chosen": -379.47528076171875, "logps/rejected": -254.66253662109375, "loss": 0.6002, "rewards/accuracies": 0.625, "rewards/chosen": -0.4422298073768616, "rewards/margins": 1.366270661354065, "rewards/rejected": -1.8085005283355713, "step": 7515 }, { "epoch": 0.87, "learning_rate": 4.066487182488587e-08, "logits/chosen": -2.4003007411956787, "logits/rejected": -2.61470627784729, "logps/chosen": -375.8123779296875, "logps/rejected": -239.82846069335938, "loss": 0.2216, "rewards/accuracies": 1.0, "rewards/chosen": 0.38269561529159546, "rewards/margins": 2.160046339035034, "rewards/rejected": -1.7773507833480835, "step": 7516 }, { "epoch": 0.87, "learning_rate": 4.062975535526162e-08, "logits/chosen": -2.7294671535491943, "logits/rejected": -2.695668935775757, "logps/chosen": -227.7852020263672, "logps/rejected": -243.02723693847656, "loss": 0.2116, "rewards/accuracies": 0.875, "rewards/chosen": 0.16507235169410706, "rewards/margins": 2.392014265060425, "rewards/rejected": -2.2269418239593506, "step": 7517 }, { "epoch": 0.87, "learning_rate": 4.059463888563736e-08, "logits/chosen": -3.5599355697631836, "logits/rejected": -3.4366002082824707, "logps/chosen": -222.60365295410156, "logps/rejected": -233.5117645263672, "loss": 0.1625, "rewards/accuracies": 0.875, "rewards/chosen": 0.6110637187957764, "rewards/margins": 3.060159921646118, "rewards/rejected": -2.449096441268921, "step": 7518 }, { "epoch": 0.87, "learning_rate": 4.055952241601311e-08, "logits/chosen": -3.574110984802246, "logits/rejected": -3.231353998184204, "logps/chosen": -220.97280883789062, "logps/rejected": -162.0476531982422, "loss": 0.326, "rewards/accuracies": 0.75, "rewards/chosen": 0.3030501902103424, "rewards/margins": 1.923255205154419, "rewards/rejected": -1.6202049255371094, "step": 7519 }, { "epoch": 0.87, "learning_rate": 4.052440594638885e-08, "logits/chosen": -2.9844348430633545, "logits/rejected": -3.0909149646759033, "logps/chosen": -362.608642578125, "logps/rejected": -183.5186004638672, "loss": 0.295, "rewards/accuracies": 0.875, "rewards/chosen": -0.19992396235466003, "rewards/margins": 1.9291026592254639, "rewards/rejected": -2.1290266513824463, "step": 7520 }, { "epoch": 0.87, "learning_rate": 4.0489289476764607e-08, "logits/chosen": -3.32062029838562, "logits/rejected": -2.881941318511963, "logps/chosen": -190.5640106201172, "logps/rejected": -229.21377563476562, "loss": 0.5446, "rewards/accuracies": 0.5, "rewards/chosen": -0.2819797992706299, "rewards/margins": 1.2030670642852783, "rewards/rejected": -1.4850468635559082, "step": 7521 }, { "epoch": 0.87, "learning_rate": 4.045417300714035e-08, "logits/chosen": -2.8962180614471436, "logits/rejected": -2.7982969284057617, "logps/chosen": -273.21417236328125, "logps/rejected": -374.62750244140625, "loss": 0.211, "rewards/accuracies": 1.0, "rewards/chosen": 0.14779920876026154, "rewards/margins": 2.698577642440796, "rewards/rejected": -2.550778388977051, "step": 7522 }, { "epoch": 0.87, "learning_rate": 4.0419056537516094e-08, "logits/chosen": -3.433238983154297, "logits/rejected": -3.199942111968994, "logps/chosen": -289.3759765625, "logps/rejected": -242.98733520507812, "loss": 0.2824, "rewards/accuracies": 0.875, "rewards/chosen": -0.03205490857362747, "rewards/margins": 2.0512232780456543, "rewards/rejected": -2.083278179168701, "step": 7523 }, { "epoch": 0.87, "learning_rate": 4.038394006789184e-08, "logits/chosen": -4.248073577880859, "logits/rejected": -3.8847827911376953, "logps/chosen": -257.0155334472656, "logps/rejected": -194.49642944335938, "loss": 0.5694, "rewards/accuracies": 0.625, "rewards/chosen": -0.5786929726600647, "rewards/margins": 0.7365216016769409, "rewards/rejected": -1.3152146339416504, "step": 7524 }, { "epoch": 0.87, "learning_rate": 4.034882359826759e-08, "logits/chosen": -3.0310347080230713, "logits/rejected": -3.1769967079162598, "logps/chosen": -421.77264404296875, "logps/rejected": -248.3815155029297, "loss": 0.6999, "rewards/accuracies": 0.75, "rewards/chosen": -0.5656816959381104, "rewards/margins": 1.5136704444885254, "rewards/rejected": -2.0793521404266357, "step": 7525 }, { "epoch": 0.87, "learning_rate": 4.031370712864333e-08, "logits/chosen": -2.5610783100128174, "logits/rejected": -2.6042449474334717, "logps/chosen": -148.31878662109375, "logps/rejected": -257.6938171386719, "loss": 0.2598, "rewards/accuracies": 0.875, "rewards/chosen": 0.029988639056682587, "rewards/margins": 3.3818955421447754, "rewards/rejected": -3.3519067764282227, "step": 7526 }, { "epoch": 0.87, "learning_rate": 4.0278590659019083e-08, "logits/chosen": -3.9426746368408203, "logits/rejected": -3.89273738861084, "logps/chosen": -197.83651733398438, "logps/rejected": -188.46759033203125, "loss": 0.2264, "rewards/accuracies": 0.875, "rewards/chosen": -0.015062347054481506, "rewards/margins": 2.5096335411071777, "rewards/rejected": -2.524695873260498, "step": 7527 }, { "epoch": 0.87, "learning_rate": 4.0243474189394824e-08, "logits/chosen": -3.5719757080078125, "logits/rejected": -3.159079074859619, "logps/chosen": -302.1076965332031, "logps/rejected": -284.2590637207031, "loss": 0.5131, "rewards/accuracies": 0.625, "rewards/chosen": -0.4441414773464203, "rewards/margins": 1.3435890674591064, "rewards/rejected": -1.7877305746078491, "step": 7528 }, { "epoch": 0.87, "learning_rate": 4.0208357719770565e-08, "logits/chosen": -2.519930839538574, "logits/rejected": -2.7203004360198975, "logps/chosen": -362.00946044921875, "logps/rejected": -297.5560302734375, "loss": 0.2791, "rewards/accuracies": 0.875, "rewards/chosen": -0.29538097977638245, "rewards/margins": 1.7654409408569336, "rewards/rejected": -2.060822010040283, "step": 7529 }, { "epoch": 0.87, "learning_rate": 4.017324125014632e-08, "logits/chosen": -3.2347335815429688, "logits/rejected": -3.497551202774048, "logps/chosen": -320.5330810546875, "logps/rejected": -349.33648681640625, "loss": 0.6814, "rewards/accuracies": 0.625, "rewards/chosen": -0.04571977257728577, "rewards/margins": 1.0934126377105713, "rewards/rejected": -1.1391323804855347, "step": 7530 }, { "epoch": 0.87, "learning_rate": 4.013812478052206e-08, "logits/chosen": -3.31465482711792, "logits/rejected": -3.561979293823242, "logps/chosen": -316.0699462890625, "logps/rejected": -204.8778076171875, "loss": 0.3241, "rewards/accuracies": 1.0, "rewards/chosen": -0.24360236525535583, "rewards/margins": 1.8318272829055786, "rewards/rejected": -2.075429677963257, "step": 7531 }, { "epoch": 0.87, "learning_rate": 4.0103008310897806e-08, "logits/chosen": -3.580631732940674, "logits/rejected": -3.8744168281555176, "logps/chosen": -189.57240295410156, "logps/rejected": -237.7109375, "loss": 0.3708, "rewards/accuracies": 0.625, "rewards/chosen": -0.36793458461761475, "rewards/margins": 1.5106291770935059, "rewards/rejected": -1.8785638809204102, "step": 7532 }, { "epoch": 0.87, "learning_rate": 4.0067891841273554e-08, "logits/chosen": -2.4814884662628174, "logits/rejected": -2.549510955810547, "logps/chosen": -223.60247802734375, "logps/rejected": -226.96255493164062, "loss": 0.3174, "rewards/accuracies": 0.875, "rewards/chosen": -0.40669548511505127, "rewards/margins": 1.7812680006027222, "rewards/rejected": -2.1879634857177734, "step": 7533 }, { "epoch": 0.87, "learning_rate": 4.00327753716493e-08, "logits/chosen": -2.2782785892486572, "logits/rejected": -2.2890071868896484, "logps/chosen": -482.6736145019531, "logps/rejected": -409.7596740722656, "loss": 0.4736, "rewards/accuracies": 0.75, "rewards/chosen": -0.263846755027771, "rewards/margins": 2.0601847171783447, "rewards/rejected": -2.324031352996826, "step": 7534 }, { "epoch": 0.87, "learning_rate": 3.999765890202504e-08, "logits/chosen": -3.291430711746216, "logits/rejected": -3.0477380752563477, "logps/chosen": -160.39877319335938, "logps/rejected": -149.31155395507812, "loss": 0.5069, "rewards/accuracies": 0.75, "rewards/chosen": 0.298551470041275, "rewards/margins": 0.9882910847663879, "rewards/rejected": -0.6897397041320801, "step": 7535 }, { "epoch": 0.87, "learning_rate": 3.9962542432400795e-08, "logits/chosen": -3.5985238552093506, "logits/rejected": -3.789578914642334, "logps/chosen": -300.0981140136719, "logps/rejected": -290.34930419921875, "loss": 0.1317, "rewards/accuracies": 1.0, "rewards/chosen": 0.28566309809684753, "rewards/margins": 2.4673538208007812, "rewards/rejected": -2.1816906929016113, "step": 7536 }, { "epoch": 0.87, "learning_rate": 3.9927425962776536e-08, "logits/chosen": -3.0996973514556885, "logits/rejected": -3.2523012161254883, "logps/chosen": -93.00965118408203, "logps/rejected": -167.6415252685547, "loss": 0.2692, "rewards/accuracies": 0.875, "rewards/chosen": 0.1543428897857666, "rewards/margins": 2.309688091278076, "rewards/rejected": -2.1553452014923096, "step": 7537 }, { "epoch": 0.87, "learning_rate": 3.989230949315229e-08, "logits/chosen": -3.614081382751465, "logits/rejected": -3.0137524604797363, "logps/chosen": -420.4374694824219, "logps/rejected": -291.822509765625, "loss": 0.2361, "rewards/accuracies": 0.75, "rewards/chosen": 0.0666956752538681, "rewards/margins": 2.596250295639038, "rewards/rejected": -2.529554843902588, "step": 7538 }, { "epoch": 0.87, "learning_rate": 3.985719302352803e-08, "logits/chosen": -2.2222652435302734, "logits/rejected": -2.005113124847412, "logps/chosen": -150.3624267578125, "logps/rejected": -277.3478088378906, "loss": 0.2177, "rewards/accuracies": 1.0, "rewards/chosen": 0.22399689257144928, "rewards/margins": 1.8131697177886963, "rewards/rejected": -1.5891728401184082, "step": 7539 }, { "epoch": 0.87, "learning_rate": 3.982207655390378e-08, "logits/chosen": -2.625220775604248, "logits/rejected": -2.5450353622436523, "logps/chosen": -577.5968017578125, "logps/rejected": -337.7035217285156, "loss": 0.2396, "rewards/accuracies": 1.0, "rewards/chosen": -0.17025962471961975, "rewards/margins": 1.8822531700134277, "rewards/rejected": -2.0525126457214355, "step": 7540 }, { "epoch": 0.87, "learning_rate": 3.9786960084279525e-08, "logits/chosen": -2.866905689239502, "logits/rejected": -2.8613975048065186, "logps/chosen": -262.69256591796875, "logps/rejected": -273.4716796875, "loss": 0.2804, "rewards/accuracies": 0.875, "rewards/chosen": 0.6296974420547485, "rewards/margins": 1.670243501663208, "rewards/rejected": -1.0405460596084595, "step": 7541 }, { "epoch": 0.87, "learning_rate": 3.975184361465527e-08, "logits/chosen": -2.6864123344421387, "logits/rejected": -2.748539447784424, "logps/chosen": -211.3208770751953, "logps/rejected": -346.57586669921875, "loss": 0.1486, "rewards/accuracies": 1.0, "rewards/chosen": 0.5239019989967346, "rewards/margins": 2.248486042022705, "rewards/rejected": -1.7245843410491943, "step": 7542 }, { "epoch": 0.87, "learning_rate": 3.971672714503101e-08, "logits/chosen": -3.701113224029541, "logits/rejected": -3.444645881652832, "logps/chosen": -307.62066650390625, "logps/rejected": -141.19769287109375, "loss": 0.3325, "rewards/accuracies": 0.875, "rewards/chosen": -0.0024237819015979767, "rewards/margins": 1.8553813695907593, "rewards/rejected": -1.8578052520751953, "step": 7543 }, { "epoch": 0.87, "learning_rate": 3.9681610675406767e-08, "logits/chosen": -3.069775104522705, "logits/rejected": -3.1007938385009766, "logps/chosen": -233.069091796875, "logps/rejected": -181.84300231933594, "loss": 0.4102, "rewards/accuracies": 0.75, "rewards/chosen": -0.2961309254169464, "rewards/margins": 1.0707422494888306, "rewards/rejected": -1.3668732643127441, "step": 7544 }, { "epoch": 0.87, "learning_rate": 3.964649420578251e-08, "logits/chosen": -3.6050350666046143, "logits/rejected": -3.361511468887329, "logps/chosen": -189.4403533935547, "logps/rejected": -212.89736938476562, "loss": 0.2815, "rewards/accuracies": 0.875, "rewards/chosen": -0.17795979976654053, "rewards/margins": 1.754941463470459, "rewards/rejected": -1.932901382446289, "step": 7545 }, { "epoch": 0.87, "learning_rate": 3.961137773615826e-08, "logits/chosen": -3.1898300647735596, "logits/rejected": -2.81356143951416, "logps/chosen": -215.05674743652344, "logps/rejected": -209.92745971679688, "loss": 0.2356, "rewards/accuracies": 1.0, "rewards/chosen": 0.2028280794620514, "rewards/margins": 1.6892858743667603, "rewards/rejected": -1.4864578247070312, "step": 7546 }, { "epoch": 0.87, "learning_rate": 3.9576261266534e-08, "logits/chosen": -3.5576388835906982, "logits/rejected": -3.5651230812072754, "logps/chosen": -428.13818359375, "logps/rejected": -263.2501220703125, "loss": 0.3199, "rewards/accuracies": 0.875, "rewards/chosen": -0.11587333679199219, "rewards/margins": 1.9149330854415894, "rewards/rejected": -2.030806303024292, "step": 7547 }, { "epoch": 0.87, "learning_rate": 3.954114479690975e-08, "logits/chosen": -2.6316440105438232, "logits/rejected": -3.162478446960449, "logps/chosen": -251.7567901611328, "logps/rejected": -264.64935302734375, "loss": 0.65, "rewards/accuracies": 0.75, "rewards/chosen": -0.11990400403738022, "rewards/margins": 1.2034716606140137, "rewards/rejected": -1.3233757019042969, "step": 7548 }, { "epoch": 0.87, "learning_rate": 3.950602832728549e-08, "logits/chosen": -3.2327375411987305, "logits/rejected": -3.2179059982299805, "logps/chosen": -221.9896240234375, "logps/rejected": -221.55279541015625, "loss": 0.3705, "rewards/accuracies": 0.75, "rewards/chosen": -0.41803479194641113, "rewards/margins": 1.3247987031936646, "rewards/rejected": -1.7428333759307861, "step": 7549 }, { "epoch": 0.87, "learning_rate": 3.9470911857661243e-08, "logits/chosen": -2.3771135807037354, "logits/rejected": -2.471571445465088, "logps/chosen": -329.99822998046875, "logps/rejected": -350.55645751953125, "loss": 0.3153, "rewards/accuracies": 0.875, "rewards/chosen": 0.046194493770599365, "rewards/margins": 1.6235947608947754, "rewards/rejected": -1.5774002075195312, "step": 7550 }, { "epoch": 0.87, "learning_rate": 3.9435795388036984e-08, "logits/chosen": -3.066906213760376, "logits/rejected": -2.8141098022460938, "logps/chosen": -396.1695556640625, "logps/rejected": -274.3712158203125, "loss": 0.3162, "rewards/accuracies": 0.875, "rewards/chosen": -0.4541410207748413, "rewards/margins": 1.4618275165557861, "rewards/rejected": -1.915968656539917, "step": 7551 }, { "epoch": 0.87, "learning_rate": 3.940067891841274e-08, "logits/chosen": -2.9881432056427, "logits/rejected": -2.752713680267334, "logps/chosen": -182.09982299804688, "logps/rejected": -306.08251953125, "loss": 0.4522, "rewards/accuracies": 0.625, "rewards/chosen": -0.32706135511398315, "rewards/margins": 1.5259308815002441, "rewards/rejected": -1.8529921770095825, "step": 7552 }, { "epoch": 0.87, "learning_rate": 3.936556244878848e-08, "logits/chosen": -3.5800466537475586, "logits/rejected": -3.587484121322632, "logps/chosen": -291.1708679199219, "logps/rejected": -208.8103485107422, "loss": 0.2404, "rewards/accuracies": 1.0, "rewards/chosen": 0.4191213548183441, "rewards/margins": 1.6652395725250244, "rewards/rejected": -1.2461183071136475, "step": 7553 }, { "epoch": 0.87, "learning_rate": 3.9330445979164226e-08, "logits/chosen": -2.3640952110290527, "logits/rejected": -2.346482276916504, "logps/chosen": -120.52156829833984, "logps/rejected": -206.947998046875, "loss": 0.4608, "rewards/accuracies": 0.875, "rewards/chosen": -0.6944410800933838, "rewards/margins": 1.7416150569915771, "rewards/rejected": -2.43605637550354, "step": 7554 }, { "epoch": 0.87, "learning_rate": 3.929532950953997e-08, "logits/chosen": -3.842179775238037, "logits/rejected": -3.9236738681793213, "logps/chosen": -162.48321533203125, "logps/rejected": -280.4861755371094, "loss": 0.2041, "rewards/accuracies": 1.0, "rewards/chosen": 0.04581344500184059, "rewards/margins": 2.558535575866699, "rewards/rejected": -2.5127224922180176, "step": 7555 }, { "epoch": 0.87, "learning_rate": 3.926021303991572e-08, "logits/chosen": -2.7926151752471924, "logits/rejected": -2.5377161502838135, "logps/chosen": -257.3371276855469, "logps/rejected": -339.5315246582031, "loss": 0.1766, "rewards/accuracies": 1.0, "rewards/chosen": -0.3628157675266266, "rewards/margins": 2.4191765785217285, "rewards/rejected": -2.7819924354553223, "step": 7556 }, { "epoch": 0.87, "learning_rate": 3.922509657029146e-08, "logits/chosen": -3.9925270080566406, "logits/rejected": -4.044466972351074, "logps/chosen": -209.60350036621094, "logps/rejected": -239.74606323242188, "loss": 0.3202, "rewards/accuracies": 0.875, "rewards/chosen": -0.5925271511077881, "rewards/margins": 2.387394428253174, "rewards/rejected": -2.979921579360962, "step": 7557 }, { "epoch": 0.87, "learning_rate": 3.9189980100667215e-08, "logits/chosen": -2.9197874069213867, "logits/rejected": -2.8317458629608154, "logps/chosen": -258.7553405761719, "logps/rejected": -380.9085693359375, "loss": 0.2855, "rewards/accuracies": 0.875, "rewards/chosen": -0.08691045641899109, "rewards/margins": 2.3651578426361084, "rewards/rejected": -2.4520680904388428, "step": 7558 }, { "epoch": 0.87, "learning_rate": 3.9154863631042955e-08, "logits/chosen": -3.186643600463867, "logits/rejected": -3.0660080909729004, "logps/chosen": -307.81243896484375, "logps/rejected": -316.4818115234375, "loss": 0.1864, "rewards/accuracies": 0.875, "rewards/chosen": -0.2185508757829666, "rewards/margins": 2.727076768875122, "rewards/rejected": -2.9456276893615723, "step": 7559 }, { "epoch": 0.87, "learning_rate": 3.911974716141871e-08, "logits/chosen": -3.603273630142212, "logits/rejected": -3.1034016609191895, "logps/chosen": -219.39370727539062, "logps/rejected": -177.79420471191406, "loss": 0.4163, "rewards/accuracies": 0.75, "rewards/chosen": 0.4160585403442383, "rewards/margins": 1.7824475765228271, "rewards/rejected": -1.3663891553878784, "step": 7560 }, { "epoch": 0.87, "learning_rate": 3.908463069179445e-08, "logits/chosen": -3.111633777618408, "logits/rejected": -2.7252566814422607, "logps/chosen": -306.180908203125, "logps/rejected": -138.95387268066406, "loss": 0.3558, "rewards/accuracies": 0.875, "rewards/chosen": -0.09783172607421875, "rewards/margins": 1.4430997371673584, "rewards/rejected": -1.5409314632415771, "step": 7561 }, { "epoch": 0.87, "learning_rate": 3.90495142221702e-08, "logits/chosen": -4.150136947631836, "logits/rejected": -3.5667905807495117, "logps/chosen": -231.4912567138672, "logps/rejected": -138.6046142578125, "loss": 0.2631, "rewards/accuracies": 0.875, "rewards/chosen": -0.20945841073989868, "rewards/margins": 1.8365755081176758, "rewards/rejected": -2.0460338592529297, "step": 7562 }, { "epoch": 0.87, "learning_rate": 3.9014397752545944e-08, "logits/chosen": -2.8923513889312744, "logits/rejected": -2.9932138919830322, "logps/chosen": -242.42477416992188, "logps/rejected": -232.68936157226562, "loss": 0.2204, "rewards/accuracies": 0.875, "rewards/chosen": 0.4024260640144348, "rewards/margins": 2.592386245727539, "rewards/rejected": -2.189960241317749, "step": 7563 }, { "epoch": 0.87, "learning_rate": 3.897928128292169e-08, "logits/chosen": -3.0339345932006836, "logits/rejected": -2.7042338848114014, "logps/chosen": -338.98541259765625, "logps/rejected": -379.7603759765625, "loss": 0.1312, "rewards/accuracies": 1.0, "rewards/chosen": 0.06723837554454803, "rewards/margins": 2.9604616165161133, "rewards/rejected": -2.893223285675049, "step": 7564 }, { "epoch": 0.87, "learning_rate": 3.894416481329743e-08, "logits/chosen": -3.0354435443878174, "logits/rejected": -2.9515721797943115, "logps/chosen": -371.57928466796875, "logps/rejected": -256.2343444824219, "loss": 0.8974, "rewards/accuracies": 0.625, "rewards/chosen": -0.45775118470191956, "rewards/margins": 1.490973949432373, "rewards/rejected": -1.9487251043319702, "step": 7565 }, { "epoch": 0.87, "learning_rate": 3.8909048343673186e-08, "logits/chosen": -4.086225509643555, "logits/rejected": -3.9285669326782227, "logps/chosen": -410.01763916015625, "logps/rejected": -297.49639892578125, "loss": 0.3326, "rewards/accuracies": 0.875, "rewards/chosen": -0.5513190031051636, "rewards/margins": 2.1904683113098145, "rewards/rejected": -2.7417869567871094, "step": 7566 }, { "epoch": 0.87, "learning_rate": 3.8873931874048927e-08, "logits/chosen": -2.459522247314453, "logits/rejected": -2.658433675765991, "logps/chosen": -353.91546630859375, "logps/rejected": -315.06787109375, "loss": 0.635, "rewards/accuracies": 0.625, "rewards/chosen": -0.1849309802055359, "rewards/margins": 0.7666946053504944, "rewards/rejected": -0.9516257047653198, "step": 7567 }, { "epoch": 0.87, "learning_rate": 3.8838815404424674e-08, "logits/chosen": -3.2379226684570312, "logits/rejected": -3.097601890563965, "logps/chosen": -373.33892822265625, "logps/rejected": -292.4755554199219, "loss": 0.341, "rewards/accuracies": 1.0, "rewards/chosen": 0.07962757349014282, "rewards/margins": 1.3106374740600586, "rewards/rejected": -1.2310099601745605, "step": 7568 }, { "epoch": 0.87, "learning_rate": 3.880369893480042e-08, "logits/chosen": -3.161777973175049, "logits/rejected": -3.441592216491699, "logps/chosen": -155.29885864257812, "logps/rejected": -253.94686889648438, "loss": 0.2253, "rewards/accuracies": 0.875, "rewards/chosen": 0.5492926836013794, "rewards/margins": 2.668752908706665, "rewards/rejected": -2.119460344314575, "step": 7569 }, { "epoch": 0.87, "learning_rate": 3.876858246517617e-08, "logits/chosen": -3.053536891937256, "logits/rejected": -3.3107030391693115, "logps/chosen": -193.4838409423828, "logps/rejected": -202.07131958007812, "loss": 0.2168, "rewards/accuracies": 0.875, "rewards/chosen": -0.23331770300865173, "rewards/margins": 2.748387336730957, "rewards/rejected": -2.9817051887512207, "step": 7570 }, { "epoch": 0.87, "learning_rate": 3.873346599555191e-08, "logits/chosen": -3.2025482654571533, "logits/rejected": -3.029066801071167, "logps/chosen": -371.6539001464844, "logps/rejected": -311.97564697265625, "loss": 0.1212, "rewards/accuracies": 1.0, "rewards/chosen": -0.08835163712501526, "rewards/margins": 3.016402006149292, "rewards/rejected": -3.1047537326812744, "step": 7571 }, { "epoch": 0.87, "learning_rate": 3.869834952592766e-08, "logits/chosen": -3.24029278755188, "logits/rejected": -3.1274194717407227, "logps/chosen": -269.87481689453125, "logps/rejected": -286.5357666015625, "loss": 0.2428, "rewards/accuracies": 1.0, "rewards/chosen": -0.5753228068351746, "rewards/margins": 2.4233646392822266, "rewards/rejected": -2.998687267303467, "step": 7572 }, { "epoch": 0.87, "learning_rate": 3.8663233056303403e-08, "logits/chosen": -2.3796653747558594, "logits/rejected": -2.275763511657715, "logps/chosen": -323.4744873046875, "logps/rejected": -242.3963623046875, "loss": 0.7146, "rewards/accuracies": 0.75, "rewards/chosen": -0.36813944578170776, "rewards/margins": 1.3301877975463867, "rewards/rejected": -1.6983273029327393, "step": 7573 }, { "epoch": 0.87, "learning_rate": 3.862811658667916e-08, "logits/chosen": -2.8156120777130127, "logits/rejected": -2.7153561115264893, "logps/chosen": -255.17005920410156, "logps/rejected": -279.2387390136719, "loss": 0.3094, "rewards/accuracies": 0.875, "rewards/chosen": 0.2533290684223175, "rewards/margins": 1.449665904045105, "rewards/rejected": -1.1963369846343994, "step": 7574 }, { "epoch": 0.87, "learning_rate": 3.85930001170549e-08, "logits/chosen": -2.6669154167175293, "logits/rejected": -2.656661033630371, "logps/chosen": -162.4687042236328, "logps/rejected": -215.538330078125, "loss": 0.2655, "rewards/accuracies": 1.0, "rewards/chosen": 0.03997776657342911, "rewards/margins": 1.9633609056472778, "rewards/rejected": -1.923383116722107, "step": 7575 }, { "epoch": 0.87, "learning_rate": 3.8557883647430645e-08, "logits/chosen": -3.3245697021484375, "logits/rejected": -3.808011531829834, "logps/chosen": -81.61366271972656, "logps/rejected": -234.8203887939453, "loss": 0.4729, "rewards/accuracies": 0.75, "rewards/chosen": -0.4588702619075775, "rewards/margins": 1.1712242364883423, "rewards/rejected": -1.6300945281982422, "step": 7576 }, { "epoch": 0.87, "learning_rate": 3.852276717780639e-08, "logits/chosen": -3.17464542388916, "logits/rejected": -3.3284831047058105, "logps/chosen": -291.9651794433594, "logps/rejected": -345.1766052246094, "loss": 0.2416, "rewards/accuracies": 1.0, "rewards/chosen": 0.0014204084873199463, "rewards/margins": 2.152723789215088, "rewards/rejected": -2.151303291320801, "step": 7577 }, { "epoch": 0.87, "learning_rate": 3.848765070818213e-08, "logits/chosen": -2.635668992996216, "logits/rejected": -2.9738638401031494, "logps/chosen": -480.68389892578125, "logps/rejected": -364.9833984375, "loss": 0.299, "rewards/accuracies": 1.0, "rewards/chosen": -0.2666846513748169, "rewards/margins": 1.2803875207901, "rewards/rejected": -1.5470722913742065, "step": 7578 }, { "epoch": 0.87, "learning_rate": 3.845253423855788e-08, "logits/chosen": -2.5546014308929443, "logits/rejected": -2.7725882530212402, "logps/chosen": -252.08096313476562, "logps/rejected": -322.1826171875, "loss": 0.3792, "rewards/accuracies": 0.875, "rewards/chosen": 0.00039318203926086426, "rewards/margins": 1.8391757011413574, "rewards/rejected": -1.838782548904419, "step": 7579 }, { "epoch": 0.87, "learning_rate": 3.841741776893363e-08, "logits/chosen": -2.9973411560058594, "logits/rejected": -2.77644944190979, "logps/chosen": -222.830322265625, "logps/rejected": -238.18362426757812, "loss": 0.4535, "rewards/accuracies": 0.75, "rewards/chosen": -0.2909637689590454, "rewards/margins": 1.4511042833328247, "rewards/rejected": -1.7420680522918701, "step": 7580 }, { "epoch": 0.87, "learning_rate": 3.8382301299309375e-08, "logits/chosen": -2.3000221252441406, "logits/rejected": -2.6768991947174072, "logps/chosen": -123.8421630859375, "logps/rejected": -216.54408264160156, "loss": 0.285, "rewards/accuracies": 1.0, "rewards/chosen": -0.2440255880355835, "rewards/margins": 1.600690245628357, "rewards/rejected": -1.8447158336639404, "step": 7581 }, { "epoch": 0.87, "learning_rate": 3.8347184829685115e-08, "logits/chosen": -3.424807548522949, "logits/rejected": -3.3455495834350586, "logps/chosen": -343.9700622558594, "logps/rejected": -202.4750213623047, "loss": 0.4227, "rewards/accuracies": 0.875, "rewards/chosen": -0.5537621974945068, "rewards/margins": 1.5196900367736816, "rewards/rejected": -2.0734522342681885, "step": 7582 }, { "epoch": 0.87, "learning_rate": 3.831206836006087e-08, "logits/chosen": -3.428264617919922, "logits/rejected": -3.482574462890625, "logps/chosen": -319.1234436035156, "logps/rejected": -322.68170166015625, "loss": 0.4837, "rewards/accuracies": 0.75, "rewards/chosen": 0.108221635222435, "rewards/margins": 1.870319128036499, "rewards/rejected": -1.7620974779129028, "step": 7583 }, { "epoch": 0.87, "learning_rate": 3.827695189043661e-08, "logits/chosen": -3.701219320297241, "logits/rejected": -3.929344415664673, "logps/chosen": -155.2252197265625, "logps/rejected": -202.30233764648438, "loss": 0.2174, "rewards/accuracies": 1.0, "rewards/chosen": 0.08364749699831009, "rewards/margins": 2.0329108238220215, "rewards/rejected": -1.9492634534835815, "step": 7584 }, { "epoch": 0.87, "learning_rate": 3.824183542081236e-08, "logits/chosen": -3.935368061065674, "logits/rejected": -3.663639783859253, "logps/chosen": -267.15283203125, "logps/rejected": -199.788330078125, "loss": 0.4969, "rewards/accuracies": 0.625, "rewards/chosen": -0.16736631095409393, "rewards/margins": 1.8326650857925415, "rewards/rejected": -2.0000314712524414, "step": 7585 }, { "epoch": 0.87, "learning_rate": 3.8206718951188104e-08, "logits/chosen": -3.275580406188965, "logits/rejected": -3.2906947135925293, "logps/chosen": -240.33999633789062, "logps/rejected": -149.70059204101562, "loss": 0.4238, "rewards/accuracies": 0.75, "rewards/chosen": -0.06640158593654633, "rewards/margins": 1.2977986335754395, "rewards/rejected": -1.3642001152038574, "step": 7586 }, { "epoch": 0.87, "learning_rate": 3.817160248156385e-08, "logits/chosen": -2.5057356357574463, "logits/rejected": -2.960049867630005, "logps/chosen": -398.403564453125, "logps/rejected": -229.0504150390625, "loss": 0.6163, "rewards/accuracies": 0.5, "rewards/chosen": -0.12105005979537964, "rewards/margins": 1.1103798151016235, "rewards/rejected": -1.231429934501648, "step": 7587 }, { "epoch": 0.87, "learning_rate": 3.813648601193959e-08, "logits/chosen": -2.437084674835205, "logits/rejected": -2.447450876235962, "logps/chosen": -282.7270202636719, "logps/rejected": -288.4638977050781, "loss": 0.226, "rewards/accuracies": 0.875, "rewards/chosen": 0.12493991106748581, "rewards/margins": 2.42681622505188, "rewards/rejected": -2.3018765449523926, "step": 7588 }, { "epoch": 0.87, "learning_rate": 3.8101369542315346e-08, "logits/chosen": -3.584993362426758, "logits/rejected": -3.843242883682251, "logps/chosen": -182.67523193359375, "logps/rejected": -224.857177734375, "loss": 0.3774, "rewards/accuracies": 0.875, "rewards/chosen": -0.2901533842086792, "rewards/margins": 2.3085780143737793, "rewards/rejected": -2.598731517791748, "step": 7589 }, { "epoch": 0.87, "learning_rate": 3.8066253072691087e-08, "logits/chosen": -2.957814931869507, "logits/rejected": -2.907405376434326, "logps/chosen": -221.808837890625, "logps/rejected": -221.50595092773438, "loss": 0.6174, "rewards/accuracies": 0.625, "rewards/chosen": -0.2720318138599396, "rewards/margins": 1.164910078048706, "rewards/rejected": -1.4369418621063232, "step": 7590 }, { "epoch": 0.88, "learning_rate": 3.803113660306684e-08, "logits/chosen": -2.4351108074188232, "logits/rejected": -2.674781084060669, "logps/chosen": -235.7695770263672, "logps/rejected": -147.9312286376953, "loss": 0.4967, "rewards/accuracies": 0.75, "rewards/chosen": -0.21141257882118225, "rewards/margins": 1.2517539262771606, "rewards/rejected": -1.4631664752960205, "step": 7591 }, { "epoch": 0.88, "learning_rate": 3.799602013344258e-08, "logits/chosen": -2.7036170959472656, "logits/rejected": -2.686844825744629, "logps/chosen": -319.20123291015625, "logps/rejected": -152.3541259765625, "loss": 1.3517, "rewards/accuracies": 0.5, "rewards/chosen": -1.1489957571029663, "rewards/margins": -0.30185508728027344, "rewards/rejected": -0.8471405506134033, "step": 7592 }, { "epoch": 0.88, "learning_rate": 3.796090366381833e-08, "logits/chosen": -3.054304361343384, "logits/rejected": -3.220294952392578, "logps/chosen": -396.071044921875, "logps/rejected": -399.5506896972656, "loss": 0.2371, "rewards/accuracies": 1.0, "rewards/chosen": 0.2405589371919632, "rewards/margins": 1.6677930355072021, "rewards/rejected": -1.427234172821045, "step": 7593 }, { "epoch": 0.88, "learning_rate": 3.7925787194194075e-08, "logits/chosen": -2.644350528717041, "logits/rejected": -2.925177574157715, "logps/chosen": -378.0043640136719, "logps/rejected": -374.9029541015625, "loss": 0.9014, "rewards/accuracies": 0.625, "rewards/chosen": -0.3612551987171173, "rewards/margins": 0.03571963310241699, "rewards/rejected": -0.3969747722148895, "step": 7594 }, { "epoch": 0.88, "learning_rate": 3.789067072456982e-08, "logits/chosen": -3.098341941833496, "logits/rejected": -3.181070566177368, "logps/chosen": -311.84527587890625, "logps/rejected": -307.4010314941406, "loss": 0.3698, "rewards/accuracies": 0.875, "rewards/chosen": -0.37557315826416016, "rewards/margins": 1.9505267143249512, "rewards/rejected": -2.3261001110076904, "step": 7595 }, { "epoch": 0.88, "learning_rate": 3.7855554254945563e-08, "logits/chosen": -2.5498299598693848, "logits/rejected": -2.679361343383789, "logps/chosen": -349.10028076171875, "logps/rejected": -453.55096435546875, "loss": 0.1053, "rewards/accuracies": 1.0, "rewards/chosen": 0.7441035509109497, "rewards/margins": 3.977997303009033, "rewards/rejected": -3.233893632888794, "step": 7596 }, { "epoch": 0.88, "learning_rate": 3.782043778532132e-08, "logits/chosen": -3.1346843242645264, "logits/rejected": -3.606557846069336, "logps/chosen": -237.14112854003906, "logps/rejected": -385.85699462890625, "loss": 0.312, "rewards/accuracies": 0.75, "rewards/chosen": 0.8163938522338867, "rewards/margins": 1.717664361000061, "rewards/rejected": -0.90127032995224, "step": 7597 }, { "epoch": 0.88, "learning_rate": 3.778532131569706e-08, "logits/chosen": -3.0892677307128906, "logits/rejected": -2.6711299419403076, "logps/chosen": -288.348876953125, "logps/rejected": -311.7647705078125, "loss": 0.2924, "rewards/accuracies": 0.875, "rewards/chosen": -0.23230576515197754, "rewards/margins": 1.903717279434204, "rewards/rejected": -2.1360228061676025, "step": 7598 }, { "epoch": 0.88, "learning_rate": 3.775020484607281e-08, "logits/chosen": -2.796632766723633, "logits/rejected": -2.7059755325317383, "logps/chosen": -360.93011474609375, "logps/rejected": -440.08282470703125, "loss": 0.2664, "rewards/accuracies": 0.875, "rewards/chosen": -0.10496583580970764, "rewards/margins": 3.869910478591919, "rewards/rejected": -3.9748764038085938, "step": 7599 }, { "epoch": 0.88, "learning_rate": 3.771508837644855e-08, "logits/chosen": -3.407113552093506, "logits/rejected": -2.955613136291504, "logps/chosen": -444.0251770019531, "logps/rejected": -285.7989501953125, "loss": 0.4418, "rewards/accuracies": 0.875, "rewards/chosen": 0.4099489748477936, "rewards/margins": 1.5719212293624878, "rewards/rejected": -1.1619722843170166, "step": 7600 }, { "epoch": 0.88, "learning_rate": 3.76799719068243e-08, "logits/chosen": -2.7645602226257324, "logits/rejected": -2.7802939414978027, "logps/chosen": -413.627685546875, "logps/rejected": -250.46575927734375, "loss": 0.4125, "rewards/accuracies": 0.875, "rewards/chosen": -0.33420008420944214, "rewards/margins": 1.5374984741210938, "rewards/rejected": -1.8716986179351807, "step": 7601 }, { "epoch": 0.88, "learning_rate": 3.764485543720004e-08, "logits/chosen": -3.3080568313598633, "logits/rejected": -3.3107478618621826, "logps/chosen": -118.25067138671875, "logps/rejected": -268.0151062011719, "loss": 0.3948, "rewards/accuracies": 0.625, "rewards/chosen": 0.16873550415039062, "rewards/margins": 2.0823585987091064, "rewards/rejected": -1.9136230945587158, "step": 7602 }, { "epoch": 0.88, "learning_rate": 3.7609738967575794e-08, "logits/chosen": -3.1791627407073975, "logits/rejected": -2.57861065864563, "logps/chosen": -464.30419921875, "logps/rejected": -372.3885192871094, "loss": 0.5702, "rewards/accuracies": 0.625, "rewards/chosen": -0.9690279364585876, "rewards/margins": 1.6413335800170898, "rewards/rejected": -2.6103615760803223, "step": 7603 }, { "epoch": 0.88, "learning_rate": 3.7574622497951535e-08, "logits/chosen": -2.9340481758117676, "logits/rejected": -2.7933003902435303, "logps/chosen": -306.62030029296875, "logps/rejected": -263.33294677734375, "loss": 0.8562, "rewards/accuracies": 0.625, "rewards/chosen": -0.7862238883972168, "rewards/margins": 0.17646723985671997, "rewards/rejected": -0.9626911878585815, "step": 7604 }, { "epoch": 0.88, "learning_rate": 3.753950602832729e-08, "logits/chosen": -3.69677734375, "logits/rejected": -3.8206772804260254, "logps/chosen": -137.2847900390625, "logps/rejected": -176.9290771484375, "loss": 0.6402, "rewards/accuracies": 0.875, "rewards/chosen": -0.7195086479187012, "rewards/margins": 1.0663621425628662, "rewards/rejected": -1.7858707904815674, "step": 7605 }, { "epoch": 0.88, "learning_rate": 3.750438955870303e-08, "logits/chosen": -2.784344434738159, "logits/rejected": -2.7706947326660156, "logps/chosen": -165.04498291015625, "logps/rejected": -183.4173583984375, "loss": 0.8607, "rewards/accuracies": 0.5, "rewards/chosen": -0.2199465036392212, "rewards/margins": 0.13225476443767548, "rewards/rejected": -0.3522012531757355, "step": 7606 }, { "epoch": 0.88, "learning_rate": 3.7469273089078776e-08, "logits/chosen": -2.6172454357147217, "logits/rejected": -2.9715640544891357, "logps/chosen": -134.66146850585938, "logps/rejected": -111.86105346679688, "loss": 0.4465, "rewards/accuracies": 0.875, "rewards/chosen": -0.0568179190158844, "rewards/margins": 1.4647725820541382, "rewards/rejected": -1.5215904712677002, "step": 7607 }, { "epoch": 0.88, "learning_rate": 3.7434156619454524e-08, "logits/chosen": -2.736809492111206, "logits/rejected": -3.0143697261810303, "logps/chosen": -200.12159729003906, "logps/rejected": -273.3205871582031, "loss": 0.3598, "rewards/accuracies": 0.875, "rewards/chosen": 0.057573266327381134, "rewards/margins": 2.292672634124756, "rewards/rejected": -2.2350993156433105, "step": 7608 }, { "epoch": 0.88, "learning_rate": 3.7399040149830264e-08, "logits/chosen": -3.541539430618286, "logits/rejected": -3.480531930923462, "logps/chosen": -271.50872802734375, "logps/rejected": -173.95388793945312, "loss": 0.4854, "rewards/accuracies": 0.625, "rewards/chosen": 0.2878613770008087, "rewards/margins": 0.8165280222892761, "rewards/rejected": -0.5286666750907898, "step": 7609 }, { "epoch": 0.88, "learning_rate": 3.736392368020601e-08, "logits/chosen": -2.619772434234619, "logits/rejected": -2.6740944385528564, "logps/chosen": -223.5157470703125, "logps/rejected": -168.71484375, "loss": 0.5366, "rewards/accuracies": 0.75, "rewards/chosen": -0.18076805770397186, "rewards/margins": 0.9080381393432617, "rewards/rejected": -1.08880615234375, "step": 7610 }, { "epoch": 0.88, "learning_rate": 3.732880721058176e-08, "logits/chosen": -3.4996025562286377, "logits/rejected": -3.363617420196533, "logps/chosen": -289.63201904296875, "logps/rejected": -260.6901550292969, "loss": 0.4496, "rewards/accuracies": 0.75, "rewards/chosen": 0.014582201838493347, "rewards/margins": 1.2935256958007812, "rewards/rejected": -1.278943419456482, "step": 7611 }, { "epoch": 0.88, "learning_rate": 3.7293690740957506e-08, "logits/chosen": -3.1556713581085205, "logits/rejected": -3.169811725616455, "logps/chosen": -162.7407684326172, "logps/rejected": -106.66816711425781, "loss": 0.7193, "rewards/accuracies": 0.5, "rewards/chosen": -0.22439280152320862, "rewards/margins": 0.17966556549072266, "rewards/rejected": -0.4040583372116089, "step": 7612 }, { "epoch": 0.88, "learning_rate": 3.725857427133325e-08, "logits/chosen": -3.2529654502868652, "logits/rejected": -3.5669078826904297, "logps/chosen": -219.0803985595703, "logps/rejected": -178.70904541015625, "loss": 0.2699, "rewards/accuracies": 0.875, "rewards/chosen": 0.23684369027614594, "rewards/margins": 2.1413044929504395, "rewards/rejected": -1.9044607877731323, "step": 7613 }, { "epoch": 0.88, "learning_rate": 3.7223457801709e-08, "logits/chosen": -3.594912528991699, "logits/rejected": -3.6971917152404785, "logps/chosen": -153.599853515625, "logps/rejected": -310.6912536621094, "loss": 0.6387, "rewards/accuracies": 0.625, "rewards/chosen": -0.17991191148757935, "rewards/margins": 2.041037082672119, "rewards/rejected": -2.220949172973633, "step": 7614 }, { "epoch": 0.88, "learning_rate": 3.718834133208475e-08, "logits/chosen": -3.022644519805908, "logits/rejected": -2.781877279281616, "logps/chosen": -257.0694885253906, "logps/rejected": -268.97760009765625, "loss": 0.3084, "rewards/accuracies": 0.875, "rewards/chosen": -0.08323192596435547, "rewards/margins": 1.7235392332077026, "rewards/rejected": -1.8067710399627686, "step": 7615 }, { "epoch": 0.88, "learning_rate": 3.7153224862460495e-08, "logits/chosen": -2.7521159648895264, "logits/rejected": -2.734151840209961, "logps/chosen": -174.47068786621094, "logps/rejected": -238.9608154296875, "loss": 0.3379, "rewards/accuracies": 0.75, "rewards/chosen": 0.09667423367500305, "rewards/margins": 1.8496005535125732, "rewards/rejected": -1.7529263496398926, "step": 7616 }, { "epoch": 0.88, "learning_rate": 3.7118108392836235e-08, "logits/chosen": -2.5079736709594727, "logits/rejected": -2.9383625984191895, "logps/chosen": -197.1796875, "logps/rejected": -196.0464324951172, "loss": 0.3859, "rewards/accuracies": 0.75, "rewards/chosen": 0.09908384084701538, "rewards/margins": 2.128351926803589, "rewards/rejected": -2.0292680263519287, "step": 7617 }, { "epoch": 0.88, "learning_rate": 3.708299192321198e-08, "logits/chosen": -2.2869584560394287, "logits/rejected": -2.3304355144500732, "logps/chosen": -132.51458740234375, "logps/rejected": -211.7092742919922, "loss": 0.3483, "rewards/accuracies": 0.875, "rewards/chosen": -0.392471045255661, "rewards/margins": 1.4465982913970947, "rewards/rejected": -1.839069128036499, "step": 7618 }, { "epoch": 0.88, "learning_rate": 3.704787545358773e-08, "logits/chosen": -2.9152591228485107, "logits/rejected": -2.8448293209075928, "logps/chosen": -204.47763061523438, "logps/rejected": -201.3577880859375, "loss": 0.3134, "rewards/accuracies": 0.875, "rewards/chosen": 0.02714618667960167, "rewards/margins": 1.7272515296936035, "rewards/rejected": -1.7001054286956787, "step": 7619 }, { "epoch": 0.88, "learning_rate": 3.701275898396348e-08, "logits/chosen": -2.2620391845703125, "logits/rejected": -2.6744260787963867, "logps/chosen": -458.10479736328125, "logps/rejected": -211.65757751464844, "loss": 0.1486, "rewards/accuracies": 1.0, "rewards/chosen": 0.7492223381996155, "rewards/margins": 2.045100212097168, "rewards/rejected": -1.2958779335021973, "step": 7620 }, { "epoch": 0.88, "learning_rate": 3.6977642514339224e-08, "logits/chosen": -2.875605583190918, "logits/rejected": -2.869070529937744, "logps/chosen": -206.992431640625, "logps/rejected": -226.53341674804688, "loss": 0.5937, "rewards/accuracies": 0.625, "rewards/chosen": -0.605523943901062, "rewards/margins": 0.6339443325996399, "rewards/rejected": -1.2394683361053467, "step": 7621 }, { "epoch": 0.88, "learning_rate": 3.694252604471497e-08, "logits/chosen": -3.081188917160034, "logits/rejected": -3.0427112579345703, "logps/chosen": -285.08203125, "logps/rejected": -346.0024719238281, "loss": 0.21, "rewards/accuracies": 1.0, "rewards/chosen": 0.25914713740348816, "rewards/margins": 2.268519878387451, "rewards/rejected": -2.0093727111816406, "step": 7622 }, { "epoch": 0.88, "learning_rate": 3.690740957509072e-08, "logits/chosen": -3.2641797065734863, "logits/rejected": -3.328871488571167, "logps/chosen": -313.53192138671875, "logps/rejected": -280.05340576171875, "loss": 0.2644, "rewards/accuracies": 1.0, "rewards/chosen": -0.06294530630111694, "rewards/margins": 1.6746585369110107, "rewards/rejected": -1.7376036643981934, "step": 7623 }, { "epoch": 0.88, "learning_rate": 3.687229310546646e-08, "logits/chosen": -3.3563408851623535, "logits/rejected": -3.3440425395965576, "logps/chosen": -289.53363037109375, "logps/rejected": -203.98892211914062, "loss": 0.4174, "rewards/accuracies": 0.75, "rewards/chosen": -0.4824787378311157, "rewards/margins": 2.3691229820251465, "rewards/rejected": -2.8516016006469727, "step": 7624 }, { "epoch": 0.88, "learning_rate": 3.683717663584221e-08, "logits/chosen": -3.2121639251708984, "logits/rejected": -3.3572726249694824, "logps/chosen": -218.2339630126953, "logps/rejected": -153.7510223388672, "loss": 0.2932, "rewards/accuracies": 0.875, "rewards/chosen": -0.14481651782989502, "rewards/margins": 2.0227060317993164, "rewards/rejected": -2.167522430419922, "step": 7625 }, { "epoch": 0.88, "learning_rate": 3.6802060166217954e-08, "logits/chosen": -2.7336974143981934, "logits/rejected": -2.971919298171997, "logps/chosen": -274.9068603515625, "logps/rejected": -358.610107421875, "loss": 0.6866, "rewards/accuracies": 0.5, "rewards/chosen": -0.15624390542507172, "rewards/margins": 0.8141628503799438, "rewards/rejected": -0.9704067707061768, "step": 7626 }, { "epoch": 0.88, "learning_rate": 3.67669436965937e-08, "logits/chosen": -2.7857155799865723, "logits/rejected": -2.771305799484253, "logps/chosen": -174.1531982421875, "logps/rejected": -138.58377075195312, "loss": 0.4414, "rewards/accuracies": 0.875, "rewards/chosen": -0.5070219039916992, "rewards/margins": 0.7728464603424072, "rewards/rejected": -1.2798683643341064, "step": 7627 }, { "epoch": 0.88, "learning_rate": 3.673182722696945e-08, "logits/chosen": -3.3698015213012695, "logits/rejected": -2.99528431892395, "logps/chosen": -293.1990051269531, "logps/rejected": -226.3348388671875, "loss": 0.4878, "rewards/accuracies": 0.75, "rewards/chosen": -0.21373769640922546, "rewards/margins": 1.865426778793335, "rewards/rejected": -2.079164505004883, "step": 7628 }, { "epoch": 0.88, "learning_rate": 3.6696710757345196e-08, "logits/chosen": -2.5424275398254395, "logits/rejected": -2.4286184310913086, "logps/chosen": -358.4206848144531, "logps/rejected": -345.48876953125, "loss": 0.3504, "rewards/accuracies": 1.0, "rewards/chosen": -0.5109260082244873, "rewards/margins": 1.3053237199783325, "rewards/rejected": -1.8162497282028198, "step": 7629 }, { "epoch": 0.88, "learning_rate": 3.666159428772094e-08, "logits/chosen": -2.4932146072387695, "logits/rejected": -2.403775691986084, "logps/chosen": -233.6435089111328, "logps/rejected": -218.89508056640625, "loss": 0.4124, "rewards/accuracies": 0.75, "rewards/chosen": -0.3113442063331604, "rewards/margins": 1.247554063796997, "rewards/rejected": -1.5588980913162231, "step": 7630 }, { "epoch": 0.88, "learning_rate": 3.6626477818096684e-08, "logits/chosen": -3.1626033782958984, "logits/rejected": -3.1247336864471436, "logps/chosen": -352.94781494140625, "logps/rejected": -310.75421142578125, "loss": 0.2941, "rewards/accuracies": 0.875, "rewards/chosen": 0.1739061176776886, "rewards/margins": 1.859586477279663, "rewards/rejected": -1.6856803894042969, "step": 7631 }, { "epoch": 0.88, "learning_rate": 3.659136134847243e-08, "logits/chosen": -3.2817039489746094, "logits/rejected": -3.4136295318603516, "logps/chosen": -206.05455017089844, "logps/rejected": -168.4808349609375, "loss": 0.342, "rewards/accuracies": 0.75, "rewards/chosen": 0.03652048483490944, "rewards/margins": 1.6448055505752563, "rewards/rejected": -1.6082849502563477, "step": 7632 }, { "epoch": 0.88, "learning_rate": 3.655624487884818e-08, "logits/chosen": -3.799051284790039, "logits/rejected": -3.178656816482544, "logps/chosen": -357.99871826171875, "logps/rejected": -162.61441040039062, "loss": 0.4786, "rewards/accuracies": 0.875, "rewards/chosen": -0.23966602981090546, "rewards/margins": 1.1368972063064575, "rewards/rejected": -1.376563310623169, "step": 7633 }, { "epoch": 0.88, "learning_rate": 3.6521128409223925e-08, "logits/chosen": -2.6555280685424805, "logits/rejected": -2.6765482425689697, "logps/chosen": -301.66339111328125, "logps/rejected": -309.6047058105469, "loss": 0.4045, "rewards/accuracies": 0.875, "rewards/chosen": -0.5260014533996582, "rewards/margins": 1.3505544662475586, "rewards/rejected": -1.8765559196472168, "step": 7634 }, { "epoch": 0.88, "learning_rate": 3.648601193959967e-08, "logits/chosen": -3.1763036251068115, "logits/rejected": -3.192868232727051, "logps/chosen": -355.01123046875, "logps/rejected": -241.92239379882812, "loss": 0.3658, "rewards/accuracies": 0.875, "rewards/chosen": 0.17121285200119019, "rewards/margins": 1.2839034795761108, "rewards/rejected": -1.1126905679702759, "step": 7635 }, { "epoch": 0.88, "learning_rate": 3.645089546997542e-08, "logits/chosen": -3.876673460006714, "logits/rejected": -3.7684497833251953, "logps/chosen": -229.6480712890625, "logps/rejected": -230.7838134765625, "loss": 0.2233, "rewards/accuracies": 1.0, "rewards/chosen": 0.27507105469703674, "rewards/margins": 2.1472644805908203, "rewards/rejected": -1.872193455696106, "step": 7636 }, { "epoch": 0.88, "learning_rate": 3.641577900035117e-08, "logits/chosen": -3.1206889152526855, "logits/rejected": -3.1701934337615967, "logps/chosen": -503.32061767578125, "logps/rejected": -327.8870849609375, "loss": 0.6893, "rewards/accuracies": 0.75, "rewards/chosen": -0.2947362959384918, "rewards/margins": 0.3740427494049072, "rewards/rejected": -0.6687790751457214, "step": 7637 }, { "epoch": 0.88, "learning_rate": 3.638066253072691e-08, "logits/chosen": -3.3647868633270264, "logits/rejected": -3.4685096740722656, "logps/chosen": -168.15115356445312, "logps/rejected": -170.8046875, "loss": 0.4434, "rewards/accuracies": 0.75, "rewards/chosen": -0.21816954016685486, "rewards/margins": 1.3796720504760742, "rewards/rejected": -1.5978416204452515, "step": 7638 }, { "epoch": 0.88, "learning_rate": 3.6345546061102655e-08, "logits/chosen": -3.338784694671631, "logits/rejected": -3.405172824859619, "logps/chosen": -149.04666137695312, "logps/rejected": -120.28192138671875, "loss": 0.4129, "rewards/accuracies": 0.875, "rewards/chosen": 0.12702493369579315, "rewards/margins": 1.815659999847412, "rewards/rejected": -1.688634991645813, "step": 7639 }, { "epoch": 0.88, "learning_rate": 3.63104295914784e-08, "logits/chosen": -2.7295384407043457, "logits/rejected": -2.7153818607330322, "logps/chosen": -237.40707397460938, "logps/rejected": -150.3667755126953, "loss": 0.1655, "rewards/accuracies": 1.0, "rewards/chosen": 0.6186214685440063, "rewards/margins": 2.6845149993896484, "rewards/rejected": -2.0658936500549316, "step": 7640 }, { "epoch": 0.88, "learning_rate": 3.627531312185415e-08, "logits/chosen": -2.750901699066162, "logits/rejected": -2.518700361251831, "logps/chosen": -482.34442138671875, "logps/rejected": -415.9822998046875, "loss": 0.4956, "rewards/accuracies": 0.75, "rewards/chosen": -0.2535858750343323, "rewards/margins": 1.9383552074432373, "rewards/rejected": -2.191941261291504, "step": 7641 }, { "epoch": 0.88, "learning_rate": 3.6240196652229897e-08, "logits/chosen": -2.933825969696045, "logits/rejected": -2.9700405597686768, "logps/chosen": -185.8231201171875, "logps/rejected": -218.3321990966797, "loss": 0.2981, "rewards/accuracies": 0.875, "rewards/chosen": 0.05574329197406769, "rewards/margins": 1.755366563796997, "rewards/rejected": -1.6996231079101562, "step": 7642 }, { "epoch": 0.88, "learning_rate": 3.6205080182605644e-08, "logits/chosen": -2.606166124343872, "logits/rejected": -2.5084586143493652, "logps/chosen": -388.26019287109375, "logps/rejected": -336.9754638671875, "loss": 0.5517, "rewards/accuracies": 0.75, "rewards/chosen": -0.17042848467826843, "rewards/margins": 1.6970205307006836, "rewards/rejected": -1.8674489259719849, "step": 7643 }, { "epoch": 0.88, "learning_rate": 3.616996371298139e-08, "logits/chosen": -3.3378026485443115, "logits/rejected": -3.2633228302001953, "logps/chosen": -506.8536376953125, "logps/rejected": -268.85943603515625, "loss": 0.9576, "rewards/accuracies": 0.5, "rewards/chosen": -0.619388222694397, "rewards/margins": 0.6217590570449829, "rewards/rejected": -1.2411472797393799, "step": 7644 }, { "epoch": 0.88, "learning_rate": 3.613484724335713e-08, "logits/chosen": -4.131060600280762, "logits/rejected": -4.020941734313965, "logps/chosen": -165.30917358398438, "logps/rejected": -176.06236267089844, "loss": 0.5372, "rewards/accuracies": 0.75, "rewards/chosen": -0.1151377260684967, "rewards/margins": 0.8534390926361084, "rewards/rejected": -0.9685768485069275, "step": 7645 }, { "epoch": 0.88, "learning_rate": 3.609973077373288e-08, "logits/chosen": -2.8859474658966064, "logits/rejected": -2.969754457473755, "logps/chosen": -331.26263427734375, "logps/rejected": -243.60330200195312, "loss": 0.5355, "rewards/accuracies": 0.75, "rewards/chosen": 0.06554195284843445, "rewards/margins": 1.0498462915420532, "rewards/rejected": -0.9843042492866516, "step": 7646 }, { "epoch": 0.88, "learning_rate": 3.6064614304108626e-08, "logits/chosen": -2.2014708518981934, "logits/rejected": -2.310743570327759, "logps/chosen": -154.4513702392578, "logps/rejected": -232.7261962890625, "loss": 0.385, "rewards/accuracies": 0.875, "rewards/chosen": 0.4110721945762634, "rewards/margins": 1.7894057035446167, "rewards/rejected": -1.3783334493637085, "step": 7647 }, { "epoch": 0.88, "learning_rate": 3.602949783448437e-08, "logits/chosen": -2.59954833984375, "logits/rejected": -2.5684280395507812, "logps/chosen": -610.4935913085938, "logps/rejected": -398.55609130859375, "loss": 0.2282, "rewards/accuracies": 0.875, "rewards/chosen": 0.5761045217514038, "rewards/margins": 2.4265780448913574, "rewards/rejected": -1.8504736423492432, "step": 7648 }, { "epoch": 0.88, "learning_rate": 3.5994381364860114e-08, "logits/chosen": -4.148447513580322, "logits/rejected": -4.053508281707764, "logps/chosen": -178.102294921875, "logps/rejected": -165.68148803710938, "loss": 0.3541, "rewards/accuracies": 0.75, "rewards/chosen": -0.22258982062339783, "rewards/margins": 2.0176453590393066, "rewards/rejected": -2.2402350902557373, "step": 7649 }, { "epoch": 0.88, "learning_rate": 3.595926489523586e-08, "logits/chosen": -3.127763509750366, "logits/rejected": -3.190213918685913, "logps/chosen": -203.63058471679688, "logps/rejected": -200.00582885742188, "loss": 0.3159, "rewards/accuracies": 0.875, "rewards/chosen": 0.2161267250776291, "rewards/margins": 3.2045531272888184, "rewards/rejected": -2.988426446914673, "step": 7650 }, { "epoch": 0.88, "learning_rate": 3.592414842561161e-08, "logits/chosen": -3.004331588745117, "logits/rejected": -3.208406686782837, "logps/chosen": -230.68695068359375, "logps/rejected": -287.5965881347656, "loss": 0.3823, "rewards/accuracies": 0.875, "rewards/chosen": 0.048517100512981415, "rewards/margins": 2.1586720943450928, "rewards/rejected": -2.110154867172241, "step": 7651 }, { "epoch": 0.88, "learning_rate": 3.5889031955987356e-08, "logits/chosen": -3.659144878387451, "logits/rejected": -3.471808433532715, "logps/chosen": -197.71914672851562, "logps/rejected": -227.12258911132812, "loss": 0.3044, "rewards/accuracies": 0.75, "rewards/chosen": 0.5203490853309631, "rewards/margins": 2.3945536613464355, "rewards/rejected": -1.8742048740386963, "step": 7652 }, { "epoch": 0.88, "learning_rate": 3.58539154863631e-08, "logits/chosen": -3.1716010570526123, "logits/rejected": -3.0795986652374268, "logps/chosen": -270.70849609375, "logps/rejected": -404.6064147949219, "loss": 0.1384, "rewards/accuracies": 1.0, "rewards/chosen": 0.008125629276037216, "rewards/margins": 2.973641872406006, "rewards/rejected": -2.9655165672302246, "step": 7653 }, { "epoch": 0.88, "learning_rate": 3.581879901673885e-08, "logits/chosen": -3.29257869720459, "logits/rejected": -2.8480136394500732, "logps/chosen": -195.81829833984375, "logps/rejected": -261.26654052734375, "loss": 0.3753, "rewards/accuracies": 0.625, "rewards/chosen": 0.2969158887863159, "rewards/margins": 1.8440520763397217, "rewards/rejected": -1.5471360683441162, "step": 7654 }, { "epoch": 0.88, "learning_rate": 3.578368254711459e-08, "logits/chosen": -2.788461208343506, "logits/rejected": -2.635265350341797, "logps/chosen": -268.42724609375, "logps/rejected": -266.407470703125, "loss": 0.3061, "rewards/accuracies": 0.75, "rewards/chosen": -0.04917724430561066, "rewards/margins": 2.7990050315856934, "rewards/rejected": -2.848182201385498, "step": 7655 }, { "epoch": 0.88, "learning_rate": 3.574856607749034e-08, "logits/chosen": -2.817654848098755, "logits/rejected": -3.0996322631835938, "logps/chosen": -245.56982421875, "logps/rejected": -233.31753540039062, "loss": 0.5579, "rewards/accuracies": 0.75, "rewards/chosen": 0.2723575234413147, "rewards/margins": 1.7695564031600952, "rewards/rejected": -1.4971989393234253, "step": 7656 }, { "epoch": 0.88, "learning_rate": 3.5713449607866085e-08, "logits/chosen": -3.146768093109131, "logits/rejected": -3.093116283416748, "logps/chosen": -163.6436767578125, "logps/rejected": -163.0171661376953, "loss": 0.6741, "rewards/accuracies": 0.625, "rewards/chosen": -0.163284033536911, "rewards/margins": 1.4121973514556885, "rewards/rejected": -1.5754815340042114, "step": 7657 }, { "epoch": 0.88, "learning_rate": 3.567833313824183e-08, "logits/chosen": -2.3784000873565674, "logits/rejected": -2.441859006881714, "logps/chosen": -194.76861572265625, "logps/rejected": -327.80328369140625, "loss": 0.2998, "rewards/accuracies": 0.75, "rewards/chosen": 0.15238747000694275, "rewards/margins": 3.249593734741211, "rewards/rejected": -3.0972063541412354, "step": 7658 }, { "epoch": 0.88, "learning_rate": 3.564321666861758e-08, "logits/chosen": -3.2394089698791504, "logits/rejected": -3.1864938735961914, "logps/chosen": -152.1769256591797, "logps/rejected": -134.38693237304688, "loss": 0.2922, "rewards/accuracies": 0.875, "rewards/chosen": 0.46608826518058777, "rewards/margins": 1.2863248586654663, "rewards/rejected": -0.8202365636825562, "step": 7659 }, { "epoch": 0.88, "learning_rate": 3.560810019899333e-08, "logits/chosen": -3.240140676498413, "logits/rejected": -3.018044948577881, "logps/chosen": -217.94944763183594, "logps/rejected": -315.2206115722656, "loss": 0.2427, "rewards/accuracies": 0.875, "rewards/chosen": 0.004435107111930847, "rewards/margins": 2.5326125621795654, "rewards/rejected": -2.5281777381896973, "step": 7660 }, { "epoch": 0.88, "learning_rate": 3.5572983729369074e-08, "logits/chosen": -3.0184788703918457, "logits/rejected": -3.1693787574768066, "logps/chosen": -370.0528869628906, "logps/rejected": -230.92337036132812, "loss": 0.2771, "rewards/accuracies": 0.75, "rewards/chosen": 0.6506674289703369, "rewards/margins": 2.628783941268921, "rewards/rejected": -1.978116512298584, "step": 7661 }, { "epoch": 0.88, "learning_rate": 3.5537867259744815e-08, "logits/chosen": -2.548046588897705, "logits/rejected": -2.775184154510498, "logps/chosen": -242.05828857421875, "logps/rejected": -223.63812255859375, "loss": 0.5132, "rewards/accuracies": 0.75, "rewards/chosen": 0.009656044654548168, "rewards/margins": 1.3965058326721191, "rewards/rejected": -1.3868498802185059, "step": 7662 }, { "epoch": 0.88, "learning_rate": 3.550275079012056e-08, "logits/chosen": -2.9915943145751953, "logits/rejected": -3.12161922454834, "logps/chosen": -228.07391357421875, "logps/rejected": -168.74832153320312, "loss": 0.523, "rewards/accuracies": 0.625, "rewards/chosen": -0.04166325926780701, "rewards/margins": 0.5615637302398682, "rewards/rejected": -0.6032271385192871, "step": 7663 }, { "epoch": 0.88, "learning_rate": 3.546763432049631e-08, "logits/chosen": -2.7289435863494873, "logits/rejected": -2.622244119644165, "logps/chosen": -227.98858642578125, "logps/rejected": -322.22003173828125, "loss": 0.6126, "rewards/accuracies": 0.875, "rewards/chosen": 0.15746495127677917, "rewards/margins": 1.462770700454712, "rewards/rejected": -1.3053056001663208, "step": 7664 }, { "epoch": 0.88, "learning_rate": 3.5432517850872057e-08, "logits/chosen": -3.438560724258423, "logits/rejected": -3.215437173843384, "logps/chosen": -166.12098693847656, "logps/rejected": -294.1054992675781, "loss": 0.5871, "rewards/accuracies": 0.75, "rewards/chosen": -0.9853675961494446, "rewards/margins": 0.8093535304069519, "rewards/rejected": -1.794721007347107, "step": 7665 }, { "epoch": 0.88, "learning_rate": 3.5397401381247804e-08, "logits/chosen": -2.976301670074463, "logits/rejected": -3.130638599395752, "logps/chosen": -363.5771789550781, "logps/rejected": -366.8040466308594, "loss": 0.1533, "rewards/accuracies": 1.0, "rewards/chosen": 0.3144974112510681, "rewards/margins": 2.9902987480163574, "rewards/rejected": -2.6758012771606445, "step": 7666 }, { "epoch": 0.88, "learning_rate": 3.536228491162355e-08, "logits/chosen": -3.571976900100708, "logits/rejected": -3.6370620727539062, "logps/chosen": -258.1890869140625, "logps/rejected": -304.0965881347656, "loss": 0.423, "rewards/accuracies": 0.5, "rewards/chosen": -0.10948070883750916, "rewards/margins": 1.7805047035217285, "rewards/rejected": -1.8899853229522705, "step": 7667 }, { "epoch": 0.88, "learning_rate": 3.53271684419993e-08, "logits/chosen": -3.19193696975708, "logits/rejected": -3.133286952972412, "logps/chosen": -204.53427124023438, "logps/rejected": -147.36268615722656, "loss": 0.427, "rewards/accuracies": 0.75, "rewards/chosen": -0.029847070574760437, "rewards/margins": 1.1351768970489502, "rewards/rejected": -1.165023922920227, "step": 7668 }, { "epoch": 0.88, "learning_rate": 3.529205197237504e-08, "logits/chosen": -3.725642442703247, "logits/rejected": -3.502023935317993, "logps/chosen": -275.0013122558594, "logps/rejected": -263.19573974609375, "loss": 0.5994, "rewards/accuracies": 0.75, "rewards/chosen": -0.862059473991394, "rewards/margins": 0.8998991847038269, "rewards/rejected": -1.7619587182998657, "step": 7669 }, { "epoch": 0.88, "learning_rate": 3.5256935502750786e-08, "logits/chosen": -3.3947958946228027, "logits/rejected": -3.4377174377441406, "logps/chosen": -215.1112060546875, "logps/rejected": -291.4317626953125, "loss": 0.6478, "rewards/accuracies": 0.5, "rewards/chosen": -0.8465834856033325, "rewards/margins": 1.993584394454956, "rewards/rejected": -2.840167760848999, "step": 7670 }, { "epoch": 0.88, "learning_rate": 3.5221819033126533e-08, "logits/chosen": -3.039644956588745, "logits/rejected": -3.1727547645568848, "logps/chosen": -181.00599670410156, "logps/rejected": -321.14288330078125, "loss": 0.5312, "rewards/accuracies": 0.75, "rewards/chosen": 0.36281317472457886, "rewards/margins": 1.0977845191955566, "rewards/rejected": -0.7349714040756226, "step": 7671 }, { "epoch": 0.88, "learning_rate": 3.518670256350228e-08, "logits/chosen": -3.6386072635650635, "logits/rejected": -3.569944143295288, "logps/chosen": -257.1064453125, "logps/rejected": -128.11773681640625, "loss": 0.4543, "rewards/accuracies": 0.875, "rewards/chosen": -0.11363162100315094, "rewards/margins": 1.1544733047485352, "rewards/rejected": -1.268104910850525, "step": 7672 }, { "epoch": 0.88, "learning_rate": 3.515158609387803e-08, "logits/chosen": -3.0936381816864014, "logits/rejected": -3.1023905277252197, "logps/chosen": -428.01275634765625, "logps/rejected": -414.23046875, "loss": 0.0638, "rewards/accuracies": 1.0, "rewards/chosen": -0.059980347752571106, "rewards/margins": 3.504026412963867, "rewards/rejected": -3.5640065670013428, "step": 7673 }, { "epoch": 0.88, "learning_rate": 3.5116469624253775e-08, "logits/chosen": -3.3131322860717773, "logits/rejected": -3.35331130027771, "logps/chosen": -161.24205017089844, "logps/rejected": -186.045166015625, "loss": 0.2803, "rewards/accuracies": 0.875, "rewards/chosen": -0.3432813286781311, "rewards/margins": 2.2728075981140137, "rewards/rejected": -2.6160888671875, "step": 7674 }, { "epoch": 0.88, "learning_rate": 3.508135315462952e-08, "logits/chosen": -3.2668557167053223, "logits/rejected": -3.5880250930786133, "logps/chosen": -187.70578002929688, "logps/rejected": -245.8788604736328, "loss": 0.2825, "rewards/accuracies": 0.875, "rewards/chosen": 0.255094051361084, "rewards/margins": 2.678262948989868, "rewards/rejected": -2.423168659210205, "step": 7675 }, { "epoch": 0.88, "learning_rate": 3.504623668500527e-08, "logits/chosen": -2.6756019592285156, "logits/rejected": -2.725548267364502, "logps/chosen": -154.49266052246094, "logps/rejected": -260.20599365234375, "loss": 0.2794, "rewards/accuracies": 1.0, "rewards/chosen": 0.13877670466899872, "rewards/margins": 1.4353692531585693, "rewards/rejected": -1.2965924739837646, "step": 7676 }, { "epoch": 0.89, "learning_rate": 3.501112021538101e-08, "logits/chosen": -4.036628723144531, "logits/rejected": -3.5835819244384766, "logps/chosen": -203.0177764892578, "logps/rejected": -171.12765502929688, "loss": 0.1479, "rewards/accuracies": 1.0, "rewards/chosen": 0.29513877630233765, "rewards/margins": 2.7936410903930664, "rewards/rejected": -2.498502254486084, "step": 7677 }, { "epoch": 0.89, "learning_rate": 3.497600374575676e-08, "logits/chosen": -3.1263158321380615, "logits/rejected": -3.221679925918579, "logps/chosen": -202.86936950683594, "logps/rejected": -291.65582275390625, "loss": 0.2576, "rewards/accuracies": 0.875, "rewards/chosen": -0.6727031469345093, "rewards/margins": 2.5051164627075195, "rewards/rejected": -3.1778194904327393, "step": 7678 }, { "epoch": 0.89, "learning_rate": 3.4940887276132505e-08, "logits/chosen": -3.058842182159424, "logits/rejected": -3.042689800262451, "logps/chosen": -203.01675415039062, "logps/rejected": -164.68716430664062, "loss": 0.4322, "rewards/accuracies": 0.625, "rewards/chosen": -0.16362354159355164, "rewards/margins": 1.509131669998169, "rewards/rejected": -1.6727551221847534, "step": 7679 }, { "epoch": 0.89, "learning_rate": 3.490577080650825e-08, "logits/chosen": -2.768169403076172, "logits/rejected": -2.804692506790161, "logps/chosen": -278.6617431640625, "logps/rejected": -233.3551025390625, "loss": 0.4326, "rewards/accuracies": 0.75, "rewards/chosen": -1.2474874258041382, "rewards/margins": 1.1916913986206055, "rewards/rejected": -2.439178943634033, "step": 7680 }, { "epoch": 0.89, "learning_rate": 3.4870654336884e-08, "logits/chosen": -2.9182651042938232, "logits/rejected": -3.090418577194214, "logps/chosen": -200.34136962890625, "logps/rejected": -155.178955078125, "loss": 0.2708, "rewards/accuracies": 1.0, "rewards/chosen": 0.0705111175775528, "rewards/margins": 1.7049411535263062, "rewards/rejected": -1.634429931640625, "step": 7681 }, { "epoch": 0.89, "learning_rate": 3.4835537867259746e-08, "logits/chosen": -3.857264995574951, "logits/rejected": -3.583944320678711, "logps/chosen": -435.1050109863281, "logps/rejected": -370.1891174316406, "loss": 0.3204, "rewards/accuracies": 0.75, "rewards/chosen": -0.35307058691978455, "rewards/margins": 2.765441656112671, "rewards/rejected": -3.1185121536254883, "step": 7682 }, { "epoch": 0.89, "learning_rate": 3.4800421397635494e-08, "logits/chosen": -2.8667235374450684, "logits/rejected": -3.0640289783477783, "logps/chosen": -190.1137237548828, "logps/rejected": -322.49176025390625, "loss": 0.2268, "rewards/accuracies": 1.0, "rewards/chosen": 0.3227255642414093, "rewards/margins": 2.0898518562316895, "rewards/rejected": -1.767126202583313, "step": 7683 }, { "epoch": 0.89, "learning_rate": 3.4765304928011234e-08, "logits/chosen": -3.5781478881835938, "logits/rejected": -3.4235925674438477, "logps/chosen": -277.4344177246094, "logps/rejected": -310.1117858886719, "loss": 0.3243, "rewards/accuracies": 0.875, "rewards/chosen": -0.421318918466568, "rewards/margins": 2.414865732192993, "rewards/rejected": -2.8361847400665283, "step": 7684 }, { "epoch": 0.89, "learning_rate": 3.473018845838698e-08, "logits/chosen": -2.9035568237304688, "logits/rejected": -2.4842984676361084, "logps/chosen": -273.3330383300781, "logps/rejected": -308.31298828125, "loss": 0.9169, "rewards/accuracies": 0.625, "rewards/chosen": -0.9008941054344177, "rewards/margins": -0.07152464985847473, "rewards/rejected": -0.8293695449829102, "step": 7685 }, { "epoch": 0.89, "learning_rate": 3.469507198876273e-08, "logits/chosen": -2.750377893447876, "logits/rejected": -2.6009764671325684, "logps/chosen": -220.80908203125, "logps/rejected": -245.847900390625, "loss": 0.7389, "rewards/accuracies": 0.5, "rewards/chosen": -0.4926799535751343, "rewards/margins": 0.658049464225769, "rewards/rejected": -1.1507294178009033, "step": 7686 }, { "epoch": 0.89, "learning_rate": 3.4659955519138476e-08, "logits/chosen": -3.151965379714966, "logits/rejected": -2.9210424423217773, "logps/chosen": -172.95755004882812, "logps/rejected": -165.51785278320312, "loss": 0.6492, "rewards/accuracies": 0.5, "rewards/chosen": -0.857921302318573, "rewards/margins": 0.6993834972381592, "rewards/rejected": -1.5573047399520874, "step": 7687 }, { "epoch": 0.89, "learning_rate": 3.462483904951422e-08, "logits/chosen": -3.8976378440856934, "logits/rejected": -4.207592964172363, "logps/chosen": -212.37823486328125, "logps/rejected": -242.76702880859375, "loss": 0.6889, "rewards/accuracies": 0.5, "rewards/chosen": -0.1867302656173706, "rewards/margins": 0.661041796207428, "rewards/rejected": -0.8477720022201538, "step": 7688 }, { "epoch": 0.89, "learning_rate": 3.458972257988997e-08, "logits/chosen": -2.5930685997009277, "logits/rejected": -2.50842022895813, "logps/chosen": -323.29888916015625, "logps/rejected": -330.74169921875, "loss": 0.5665, "rewards/accuracies": 0.75, "rewards/chosen": 0.1770612597465515, "rewards/margins": 1.6394681930541992, "rewards/rejected": -1.4624069929122925, "step": 7689 }, { "epoch": 0.89, "learning_rate": 3.455460611026572e-08, "logits/chosen": -2.9935970306396484, "logits/rejected": -3.141474962234497, "logps/chosen": -283.3449401855469, "logps/rejected": -254.9655303955078, "loss": 0.5054, "rewards/accuracies": 0.5, "rewards/chosen": 0.15016943216323853, "rewards/margins": 1.7433760166168213, "rewards/rejected": -1.593206763267517, "step": 7690 }, { "epoch": 0.89, "learning_rate": 3.451948964064146e-08, "logits/chosen": -3.860302209854126, "logits/rejected": -3.7988839149475098, "logps/chosen": -315.24615478515625, "logps/rejected": -312.4653015136719, "loss": 0.2754, "rewards/accuracies": 0.875, "rewards/chosen": 0.06173545867204666, "rewards/margins": 1.9881621599197388, "rewards/rejected": -1.926426649093628, "step": 7691 }, { "epoch": 0.89, "learning_rate": 3.4484373171017205e-08, "logits/chosen": -2.917102336883545, "logits/rejected": -2.9034225940704346, "logps/chosen": -252.02310180664062, "logps/rejected": -461.7735900878906, "loss": 0.4117, "rewards/accuracies": 0.875, "rewards/chosen": 0.4548414349555969, "rewards/margins": 2.1772797107696533, "rewards/rejected": -1.7224382162094116, "step": 7692 }, { "epoch": 0.89, "learning_rate": 3.444925670139295e-08, "logits/chosen": -3.5589683055877686, "logits/rejected": -3.5374221801757812, "logps/chosen": -262.6549072265625, "logps/rejected": -251.53465270996094, "loss": 0.2011, "rewards/accuracies": 1.0, "rewards/chosen": 0.5966962575912476, "rewards/margins": 2.180335521697998, "rewards/rejected": -1.5836392641067505, "step": 7693 }, { "epoch": 0.89, "learning_rate": 3.44141402317687e-08, "logits/chosen": -3.2782812118530273, "logits/rejected": -3.133056163787842, "logps/chosen": -148.0978546142578, "logps/rejected": -141.48760986328125, "loss": 0.2803, "rewards/accuracies": 0.875, "rewards/chosen": 0.16870620846748352, "rewards/margins": 1.94362211227417, "rewards/rejected": -1.7749156951904297, "step": 7694 }, { "epoch": 0.89, "learning_rate": 3.437902376214445e-08, "logits/chosen": -2.9960389137268066, "logits/rejected": -2.5571725368499756, "logps/chosen": -365.5172119140625, "logps/rejected": -356.5035400390625, "loss": 0.5127, "rewards/accuracies": 0.75, "rewards/chosen": -0.22979021072387695, "rewards/margins": 1.2422047853469849, "rewards/rejected": -1.4719949960708618, "step": 7695 }, { "epoch": 0.89, "learning_rate": 3.434390729252019e-08, "logits/chosen": -3.0020368099212646, "logits/rejected": -3.1969475746154785, "logps/chosen": -377.54302978515625, "logps/rejected": -286.8892822265625, "loss": 0.9296, "rewards/accuracies": 0.375, "rewards/chosen": -0.5243821144104004, "rewards/margins": 0.2713039517402649, "rewards/rejected": -0.7956860661506653, "step": 7696 }, { "epoch": 0.89, "learning_rate": 3.4308790822895935e-08, "logits/chosen": -2.771672010421753, "logits/rejected": -3.262355089187622, "logps/chosen": -234.47991943359375, "logps/rejected": -348.1251220703125, "loss": 0.307, "rewards/accuracies": 0.875, "rewards/chosen": 0.3008919656276703, "rewards/margins": 3.379920482635498, "rewards/rejected": -3.079028844833374, "step": 7697 }, { "epoch": 0.89, "learning_rate": 3.427367435327168e-08, "logits/chosen": -2.8321290016174316, "logits/rejected": -2.996061086654663, "logps/chosen": -180.8538818359375, "logps/rejected": -217.2718505859375, "loss": 0.4374, "rewards/accuracies": 0.75, "rewards/chosen": -0.04240456223487854, "rewards/margins": 2.6338579654693604, "rewards/rejected": -2.676262617111206, "step": 7698 }, { "epoch": 0.89, "learning_rate": 3.423855788364743e-08, "logits/chosen": -3.767836570739746, "logits/rejected": -3.559028387069702, "logps/chosen": -412.3526611328125, "logps/rejected": -294.35540771484375, "loss": 0.2104, "rewards/accuracies": 0.875, "rewards/chosen": 0.114081472158432, "rewards/margins": 2.3415002822875977, "rewards/rejected": -2.227418899536133, "step": 7699 }, { "epoch": 0.89, "learning_rate": 3.420344141402318e-08, "logits/chosen": -3.1921725273132324, "logits/rejected": -3.4911887645721436, "logps/chosen": -294.6708984375, "logps/rejected": -408.4051208496094, "loss": 0.6008, "rewards/accuracies": 0.625, "rewards/chosen": -1.0305671691894531, "rewards/margins": 1.9120299816131592, "rewards/rejected": -2.9425971508026123, "step": 7700 }, { "epoch": 0.89, "learning_rate": 3.416832494439892e-08, "logits/chosen": -3.119098663330078, "logits/rejected": -2.9794468879699707, "logps/chosen": -217.44573974609375, "logps/rejected": -259.6300048828125, "loss": 0.156, "rewards/accuracies": 1.0, "rewards/chosen": 0.1777142435312271, "rewards/margins": 2.241927146911621, "rewards/rejected": -2.0642130374908447, "step": 7701 }, { "epoch": 0.89, "learning_rate": 3.4133208474774665e-08, "logits/chosen": -2.860567092895508, "logits/rejected": -2.8718223571777344, "logps/chosen": -472.2198791503906, "logps/rejected": -569.9971313476562, "loss": 0.5037, "rewards/accuracies": 0.625, "rewards/chosen": -0.3044637441635132, "rewards/margins": 1.4361369609832764, "rewards/rejected": -1.7406007051467896, "step": 7702 }, { "epoch": 0.89, "learning_rate": 3.409809200515041e-08, "logits/chosen": -3.4763262271881104, "logits/rejected": -3.383606433868408, "logps/chosen": -151.1147003173828, "logps/rejected": -144.41392517089844, "loss": 0.3205, "rewards/accuracies": 0.75, "rewards/chosen": -0.23783788084983826, "rewards/margins": 1.6689621210098267, "rewards/rejected": -1.9068000316619873, "step": 7703 }, { "epoch": 0.89, "learning_rate": 3.406297553552616e-08, "logits/chosen": -3.863996982574463, "logits/rejected": -3.3563127517700195, "logps/chosen": -243.33673095703125, "logps/rejected": -205.6649169921875, "loss": 0.4968, "rewards/accuracies": 0.625, "rewards/chosen": -0.9303748607635498, "rewards/margins": 1.1627581119537354, "rewards/rejected": -2.093132734298706, "step": 7704 }, { "epoch": 0.89, "learning_rate": 3.4027859065901906e-08, "logits/chosen": -1.9601452350616455, "logits/rejected": -2.0156188011169434, "logps/chosen": -223.96942138671875, "logps/rejected": -189.8973388671875, "loss": 0.4333, "rewards/accuracies": 0.875, "rewards/chosen": 0.19601355493068695, "rewards/margins": 1.0412909984588623, "rewards/rejected": -0.8452774286270142, "step": 7705 }, { "epoch": 0.89, "learning_rate": 3.3992742596277654e-08, "logits/chosen": -3.083670139312744, "logits/rejected": -2.813535451889038, "logps/chosen": -235.2729949951172, "logps/rejected": -249.4950714111328, "loss": 0.1618, "rewards/accuracies": 1.0, "rewards/chosen": 0.42884957790374756, "rewards/margins": 2.9381141662597656, "rewards/rejected": -2.5092647075653076, "step": 7706 }, { "epoch": 0.89, "learning_rate": 3.39576261266534e-08, "logits/chosen": -2.3362138271331787, "logits/rejected": -2.835983991622925, "logps/chosen": -335.38690185546875, "logps/rejected": -266.859130859375, "loss": 0.6267, "rewards/accuracies": 0.75, "rewards/chosen": -0.9145666360855103, "rewards/margins": 0.516122579574585, "rewards/rejected": -1.4306893348693848, "step": 7707 }, { "epoch": 0.89, "learning_rate": 3.392250965702914e-08, "logits/chosen": -2.9312806129455566, "logits/rejected": -2.785050392150879, "logps/chosen": -255.94207763671875, "logps/rejected": -373.8504638671875, "loss": 0.2442, "rewards/accuracies": 0.875, "rewards/chosen": 0.28805339336395264, "rewards/margins": 3.512237787246704, "rewards/rejected": -3.224184513092041, "step": 7708 }, { "epoch": 0.89, "learning_rate": 3.388739318740489e-08, "logits/chosen": -3.373201847076416, "logits/rejected": -3.2293548583984375, "logps/chosen": -286.82916259765625, "logps/rejected": -246.7964324951172, "loss": 0.3442, "rewards/accuracies": 0.875, "rewards/chosen": -0.6285721063613892, "rewards/margins": 1.687580943107605, "rewards/rejected": -2.316153049468994, "step": 7709 }, { "epoch": 0.89, "learning_rate": 3.3852276717780636e-08, "logits/chosen": -3.210542678833008, "logits/rejected": -3.318963050842285, "logps/chosen": -136.4557647705078, "logps/rejected": -158.8125762939453, "loss": 0.7626, "rewards/accuracies": 0.375, "rewards/chosen": -0.5925025939941406, "rewards/margins": 1.3613091707229614, "rewards/rejected": -1.9538116455078125, "step": 7710 }, { "epoch": 0.89, "learning_rate": 3.381716024815638e-08, "logits/chosen": -2.4188497066497803, "logits/rejected": -2.806673765182495, "logps/chosen": -330.8678283691406, "logps/rejected": -233.90020751953125, "loss": 0.4428, "rewards/accuracies": 0.625, "rewards/chosen": -0.4553423523902893, "rewards/margins": 1.1576119661331177, "rewards/rejected": -1.6129543781280518, "step": 7711 }, { "epoch": 0.89, "learning_rate": 3.378204377853213e-08, "logits/chosen": -2.8287100791931152, "logits/rejected": -2.4348599910736084, "logps/chosen": -313.26263427734375, "logps/rejected": -298.743896484375, "loss": 0.2984, "rewards/accuracies": 0.875, "rewards/chosen": -0.34500542283058167, "rewards/margins": 1.7418721914291382, "rewards/rejected": -2.0868775844573975, "step": 7712 }, { "epoch": 0.89, "learning_rate": 3.374692730890788e-08, "logits/chosen": -3.153202533721924, "logits/rejected": -3.0617928504943848, "logps/chosen": -344.0357666015625, "logps/rejected": -366.5128479003906, "loss": 0.1381, "rewards/accuracies": 1.0, "rewards/chosen": -0.05073484778404236, "rewards/margins": 3.105733633041382, "rewards/rejected": -3.156468391418457, "step": 7713 }, { "epoch": 0.89, "learning_rate": 3.3711810839283625e-08, "logits/chosen": -3.1959409713745117, "logits/rejected": -2.352337121963501, "logps/chosen": -213.7987823486328, "logps/rejected": -178.318603515625, "loss": 0.2848, "rewards/accuracies": 1.0, "rewards/chosen": 0.016226936131715775, "rewards/margins": 1.5404002666473389, "rewards/rejected": -1.5241732597351074, "step": 7714 }, { "epoch": 0.89, "learning_rate": 3.3676694369659365e-08, "logits/chosen": -3.040203809738159, "logits/rejected": -3.0771360397338867, "logps/chosen": -304.8036804199219, "logps/rejected": -257.4837646484375, "loss": 0.2657, "rewards/accuracies": 0.875, "rewards/chosen": 0.6961950659751892, "rewards/margins": 1.9363155364990234, "rewards/rejected": -1.240120530128479, "step": 7715 }, { "epoch": 0.89, "learning_rate": 3.364157790003511e-08, "logits/chosen": -3.3186709880828857, "logits/rejected": -3.1841440200805664, "logps/chosen": -177.72259521484375, "logps/rejected": -185.0321807861328, "loss": 0.5347, "rewards/accuracies": 0.75, "rewards/chosen": 0.40844160318374634, "rewards/margins": 1.7803547382354736, "rewards/rejected": -1.371912956237793, "step": 7716 }, { "epoch": 0.89, "learning_rate": 3.360646143041086e-08, "logits/chosen": -3.1322526931762695, "logits/rejected": -2.8662362098693848, "logps/chosen": -308.20556640625, "logps/rejected": -240.45701599121094, "loss": 0.2534, "rewards/accuracies": 1.0, "rewards/chosen": 0.06625162065029144, "rewards/margins": 1.884826898574829, "rewards/rejected": -1.818575143814087, "step": 7717 }, { "epoch": 0.89, "learning_rate": 3.357134496078661e-08, "logits/chosen": -3.414032459259033, "logits/rejected": -3.501295328140259, "logps/chosen": -332.8255310058594, "logps/rejected": -240.69912719726562, "loss": 0.4712, "rewards/accuracies": 0.75, "rewards/chosen": -0.46190088987350464, "rewards/margins": 1.2377114295959473, "rewards/rejected": -1.6996124982833862, "step": 7718 }, { "epoch": 0.89, "learning_rate": 3.3536228491162354e-08, "logits/chosen": -3.0624935626983643, "logits/rejected": -2.562715768814087, "logps/chosen": -390.570556640625, "logps/rejected": -290.3553771972656, "loss": 0.2371, "rewards/accuracies": 0.875, "rewards/chosen": 0.2983400523662567, "rewards/margins": 2.422008991241455, "rewards/rejected": -2.123668670654297, "step": 7719 }, { "epoch": 0.89, "learning_rate": 3.35011120215381e-08, "logits/chosen": -2.645045042037964, "logits/rejected": -2.576686143875122, "logps/chosen": -155.7657928466797, "logps/rejected": -185.26316833496094, "loss": 1.2071, "rewards/accuracies": 0.125, "rewards/chosen": -1.1706604957580566, "rewards/margins": -0.7531743049621582, "rewards/rejected": -0.4174861013889313, "step": 7720 }, { "epoch": 0.89, "learning_rate": 3.346599555191385e-08, "logits/chosen": -3.4267921447753906, "logits/rejected": -3.5135669708251953, "logps/chosen": -346.3662414550781, "logps/rejected": -291.49468994140625, "loss": 0.367, "rewards/accuracies": 0.75, "rewards/chosen": -0.11909550428390503, "rewards/margins": 1.9072282314300537, "rewards/rejected": -2.0263237953186035, "step": 7721 }, { "epoch": 0.89, "learning_rate": 3.343087908228959e-08, "logits/chosen": -3.817856550216675, "logits/rejected": -3.513181447982788, "logps/chosen": -149.0838623046875, "logps/rejected": -104.24884033203125, "loss": 0.6428, "rewards/accuracies": 0.625, "rewards/chosen": -0.506304919719696, "rewards/margins": 1.18089759349823, "rewards/rejected": -1.6872024536132812, "step": 7722 }, { "epoch": 0.89, "learning_rate": 3.339576261266534e-08, "logits/chosen": -2.756549119949341, "logits/rejected": -2.644789695739746, "logps/chosen": -179.9594268798828, "logps/rejected": -264.8524475097656, "loss": 0.395, "rewards/accuracies": 0.75, "rewards/chosen": -0.3373478055000305, "rewards/margins": 1.2538487911224365, "rewards/rejected": -1.5911965370178223, "step": 7723 }, { "epoch": 0.89, "learning_rate": 3.3360646143041084e-08, "logits/chosen": -2.7818658351898193, "logits/rejected": -2.5825419425964355, "logps/chosen": -427.6488342285156, "logps/rejected": -282.6341857910156, "loss": 0.4357, "rewards/accuracies": 0.75, "rewards/chosen": 0.03854234516620636, "rewards/margins": 1.1265050172805786, "rewards/rejected": -1.0879626274108887, "step": 7724 }, { "epoch": 0.89, "learning_rate": 3.332552967341683e-08, "logits/chosen": -3.342698097229004, "logits/rejected": -3.284973621368408, "logps/chosen": -112.23065185546875, "logps/rejected": -171.39584350585938, "loss": 0.5206, "rewards/accuracies": 0.625, "rewards/chosen": -0.17175208032131195, "rewards/margins": 1.4090499877929688, "rewards/rejected": -1.5808022022247314, "step": 7725 }, { "epoch": 0.89, "learning_rate": 3.329041320379258e-08, "logits/chosen": -3.694077730178833, "logits/rejected": -4.031009674072266, "logps/chosen": -150.16653442382812, "logps/rejected": -342.7575988769531, "loss": 0.268, "rewards/accuracies": 0.75, "rewards/chosen": -0.2047451138496399, "rewards/margins": 4.190674304962158, "rewards/rejected": -4.395419120788574, "step": 7726 }, { "epoch": 0.89, "learning_rate": 3.3255296734168326e-08, "logits/chosen": -2.6517434120178223, "logits/rejected": -3.2167017459869385, "logps/chosen": -286.5120849609375, "logps/rejected": -224.70755004882812, "loss": 0.2669, "rewards/accuracies": 1.0, "rewards/chosen": -0.10657999664545059, "rewards/margins": 1.6953599452972412, "rewards/rejected": -1.801939845085144, "step": 7727 }, { "epoch": 0.89, "learning_rate": 3.322018026454407e-08, "logits/chosen": -3.2978157997131348, "logits/rejected": -3.5656328201293945, "logps/chosen": -198.2893524169922, "logps/rejected": -272.68560791015625, "loss": 0.5613, "rewards/accuracies": 0.875, "rewards/chosen": -0.15021464228630066, "rewards/margins": 0.7418673038482666, "rewards/rejected": -0.8920819163322449, "step": 7728 }, { "epoch": 0.89, "learning_rate": 3.3185063794919814e-08, "logits/chosen": -3.5045554637908936, "logits/rejected": -3.215005874633789, "logps/chosen": -119.87413787841797, "logps/rejected": -155.06973266601562, "loss": 0.5794, "rewards/accuracies": 0.75, "rewards/chosen": 0.06424407660961151, "rewards/margins": 1.2437567710876465, "rewards/rejected": -1.179512619972229, "step": 7729 }, { "epoch": 0.89, "learning_rate": 3.314994732529556e-08, "logits/chosen": -3.4658117294311523, "logits/rejected": -3.2018866539001465, "logps/chosen": -171.7327880859375, "logps/rejected": -150.44287109375, "loss": 0.397, "rewards/accuracies": 0.75, "rewards/chosen": 0.030296653509140015, "rewards/margins": 1.606359839439392, "rewards/rejected": -1.5760631561279297, "step": 7730 }, { "epoch": 0.89, "learning_rate": 3.311483085567131e-08, "logits/chosen": -2.007868766784668, "logits/rejected": -1.9814270734786987, "logps/chosen": -481.23516845703125, "logps/rejected": -420.2603759765625, "loss": 0.6195, "rewards/accuracies": 0.625, "rewards/chosen": -0.9627728462219238, "rewards/margins": 0.4949665069580078, "rewards/rejected": -1.4577393531799316, "step": 7731 }, { "epoch": 0.89, "learning_rate": 3.3079714386047055e-08, "logits/chosen": -2.501795768737793, "logits/rejected": -2.6003057956695557, "logps/chosen": -256.10491943359375, "logps/rejected": -201.0556640625, "loss": 0.3907, "rewards/accuracies": 0.875, "rewards/chosen": -0.48919951915740967, "rewards/margins": 1.5641649961471558, "rewards/rejected": -2.0533645153045654, "step": 7732 }, { "epoch": 0.89, "learning_rate": 3.30445979164228e-08, "logits/chosen": -3.5034420490264893, "logits/rejected": -3.714585065841675, "logps/chosen": -191.09793090820312, "logps/rejected": -233.52337646484375, "loss": 0.4621, "rewards/accuracies": 0.625, "rewards/chosen": -0.3328261971473694, "rewards/margins": 2.3278915882110596, "rewards/rejected": -2.660717725753784, "step": 7733 }, { "epoch": 0.89, "learning_rate": 3.300948144679855e-08, "logits/chosen": -2.7627954483032227, "logits/rejected": -2.987563371658325, "logps/chosen": -533.5601806640625, "logps/rejected": -204.4390869140625, "loss": 0.6347, "rewards/accuracies": 0.75, "rewards/chosen": 0.02906123921275139, "rewards/margins": 0.45422714948654175, "rewards/rejected": -0.42516595125198364, "step": 7734 }, { "epoch": 0.89, "learning_rate": 3.29743649771743e-08, "logits/chosen": -2.655792236328125, "logits/rejected": -2.351867914199829, "logps/chosen": -373.5713195800781, "logps/rejected": -277.4924011230469, "loss": 0.3761, "rewards/accuracies": 0.75, "rewards/chosen": -0.06853579729795456, "rewards/margins": 1.9429473876953125, "rewards/rejected": -2.0114831924438477, "step": 7735 }, { "epoch": 0.89, "learning_rate": 3.2939248507550044e-08, "logits/chosen": -3.1493935585021973, "logits/rejected": -3.3003406524658203, "logps/chosen": -135.2838897705078, "logps/rejected": -248.7139892578125, "loss": 0.3609, "rewards/accuracies": 0.625, "rewards/chosen": 0.2205415964126587, "rewards/margins": 3.1494359970092773, "rewards/rejected": -2.92889404296875, "step": 7736 }, { "epoch": 0.89, "learning_rate": 3.2904132037925785e-08, "logits/chosen": -3.2338480949401855, "logits/rejected": -3.3445420265197754, "logps/chosen": -296.6545104980469, "logps/rejected": -322.02734375, "loss": 0.5596, "rewards/accuracies": 0.75, "rewards/chosen": 0.18018876016139984, "rewards/margins": 2.422504425048828, "rewards/rejected": -2.2423157691955566, "step": 7737 }, { "epoch": 0.89, "learning_rate": 3.286901556830153e-08, "logits/chosen": -3.0029428005218506, "logits/rejected": -3.278355598449707, "logps/chosen": -186.03042602539062, "logps/rejected": -133.41319274902344, "loss": 0.4302, "rewards/accuracies": 0.75, "rewards/chosen": -0.35790660977363586, "rewards/margins": 0.9843102693557739, "rewards/rejected": -1.3422167301177979, "step": 7738 }, { "epoch": 0.89, "learning_rate": 3.283389909867728e-08, "logits/chosen": -3.8233304023742676, "logits/rejected": -3.791384220123291, "logps/chosen": -210.23963928222656, "logps/rejected": -273.9632568359375, "loss": 0.2483, "rewards/accuracies": 0.875, "rewards/chosen": -0.4137992262840271, "rewards/margins": 2.452509641647339, "rewards/rejected": -2.8663089275360107, "step": 7739 }, { "epoch": 0.89, "learning_rate": 3.2798782629053027e-08, "logits/chosen": -3.4914257526397705, "logits/rejected": -3.6795010566711426, "logps/chosen": -254.245361328125, "logps/rejected": -258.84564208984375, "loss": 0.241, "rewards/accuracies": 0.875, "rewards/chosen": -0.3799264430999756, "rewards/margins": 2.4329731464385986, "rewards/rejected": -2.812899589538574, "step": 7740 }, { "epoch": 0.89, "learning_rate": 3.2763666159428774e-08, "logits/chosen": -3.259256601333618, "logits/rejected": -3.0790326595306396, "logps/chosen": -450.0965576171875, "logps/rejected": -168.41812133789062, "loss": 0.6268, "rewards/accuracies": 0.5, "rewards/chosen": -0.5453364849090576, "rewards/margins": 0.955579936504364, "rewards/rejected": -1.5009163618087769, "step": 7741 }, { "epoch": 0.89, "learning_rate": 3.272854968980452e-08, "logits/chosen": -3.0606257915496826, "logits/rejected": -3.109020709991455, "logps/chosen": -198.93539428710938, "logps/rejected": -217.19345092773438, "loss": 0.2347, "rewards/accuracies": 0.875, "rewards/chosen": 0.47578954696655273, "rewards/margins": 2.02500581741333, "rewards/rejected": -1.5492162704467773, "step": 7742 }, { "epoch": 0.89, "learning_rate": 3.269343322018027e-08, "logits/chosen": -3.003836154937744, "logits/rejected": -2.9604852199554443, "logps/chosen": -172.08767700195312, "logps/rejected": -113.27359008789062, "loss": 1.2276, "rewards/accuracies": 0.5, "rewards/chosen": -1.2401279211044312, "rewards/margins": -0.5364943742752075, "rewards/rejected": -0.7036334276199341, "step": 7743 }, { "epoch": 0.89, "learning_rate": 3.265831675055601e-08, "logits/chosen": -3.250786542892456, "logits/rejected": -3.308811902999878, "logps/chosen": -212.99813842773438, "logps/rejected": -182.38314819335938, "loss": 0.347, "rewards/accuracies": 0.875, "rewards/chosen": -0.021382782608270645, "rewards/margins": 1.591247320175171, "rewards/rejected": -1.612630009651184, "step": 7744 }, { "epoch": 0.89, "learning_rate": 3.2623200280931756e-08, "logits/chosen": -2.9065022468566895, "logits/rejected": -2.874143123626709, "logps/chosen": -293.54095458984375, "logps/rejected": -268.6265869140625, "loss": 0.2529, "rewards/accuracies": 0.875, "rewards/chosen": 0.2599712908267975, "rewards/margins": 1.8759708404541016, "rewards/rejected": -1.615999460220337, "step": 7745 }, { "epoch": 0.89, "learning_rate": 3.25880838113075e-08, "logits/chosen": -2.663416862487793, "logits/rejected": -2.8530025482177734, "logps/chosen": -336.1132507324219, "logps/rejected": -329.21356201171875, "loss": 0.2386, "rewards/accuracies": 0.875, "rewards/chosen": -0.17912523448467255, "rewards/margins": 1.881790280342102, "rewards/rejected": -2.060915470123291, "step": 7746 }, { "epoch": 0.89, "learning_rate": 3.2552967341683244e-08, "logits/chosen": -3.1978254318237305, "logits/rejected": -3.250913619995117, "logps/chosen": -175.27389526367188, "logps/rejected": -261.72235107421875, "loss": 0.4642, "rewards/accuracies": 0.75, "rewards/chosen": -0.4060221016407013, "rewards/margins": 1.4670917987823486, "rewards/rejected": -1.8731138706207275, "step": 7747 }, { "epoch": 0.89, "learning_rate": 3.251785087205899e-08, "logits/chosen": -3.715944766998291, "logits/rejected": -3.562786340713501, "logps/chosen": -213.31846618652344, "logps/rejected": -180.7615203857422, "loss": 0.2309, "rewards/accuracies": 0.875, "rewards/chosen": -0.025960978120565414, "rewards/margins": 1.9359104633331299, "rewards/rejected": -1.9618713855743408, "step": 7748 }, { "epoch": 0.89, "learning_rate": 3.248273440243474e-08, "logits/chosen": -2.2066471576690674, "logits/rejected": -2.1416239738464355, "logps/chosen": -447.7305908203125, "logps/rejected": -240.54151916503906, "loss": 0.2902, "rewards/accuracies": 1.0, "rewards/chosen": 0.14992377161979675, "rewards/margins": 1.9970163106918335, "rewards/rejected": -1.8470923900604248, "step": 7749 }, { "epoch": 0.89, "learning_rate": 3.2447617932810486e-08, "logits/chosen": -3.320030927658081, "logits/rejected": -3.197328805923462, "logps/chosen": -233.27261352539062, "logps/rejected": -207.77139282226562, "loss": 0.4448, "rewards/accuracies": 0.875, "rewards/chosen": -0.2951366901397705, "rewards/margins": 1.2184524536132812, "rewards/rejected": -1.5135891437530518, "step": 7750 }, { "epoch": 0.89, "learning_rate": 3.241250146318623e-08, "logits/chosen": -2.609611749649048, "logits/rejected": -2.888796329498291, "logps/chosen": -276.3360595703125, "logps/rejected": -261.4114990234375, "loss": 0.5219, "rewards/accuracies": 0.75, "rewards/chosen": 0.1111648827791214, "rewards/margins": 1.113781213760376, "rewards/rejected": -1.0026164054870605, "step": 7751 }, { "epoch": 0.89, "learning_rate": 3.237738499356198e-08, "logits/chosen": -3.259030818939209, "logits/rejected": -3.3541276454925537, "logps/chosen": -308.83740234375, "logps/rejected": -252.72882080078125, "loss": 0.1962, "rewards/accuracies": 1.0, "rewards/chosen": 0.4137037992477417, "rewards/margins": 2.6914455890655518, "rewards/rejected": -2.2777419090270996, "step": 7752 }, { "epoch": 0.89, "learning_rate": 3.234226852393773e-08, "logits/chosen": -2.74841570854187, "logits/rejected": -2.617208957672119, "logps/chosen": -341.6676025390625, "logps/rejected": -391.48089599609375, "loss": 0.3499, "rewards/accuracies": 0.875, "rewards/chosen": 0.24446898698806763, "rewards/margins": 1.6166731119155884, "rewards/rejected": -1.372204065322876, "step": 7753 }, { "epoch": 0.89, "learning_rate": 3.230715205431347e-08, "logits/chosen": -3.451307773590088, "logits/rejected": -3.385662794113159, "logps/chosen": -117.44454193115234, "logps/rejected": -143.98745727539062, "loss": 0.3658, "rewards/accuracies": 0.75, "rewards/chosen": 0.11785197257995605, "rewards/margins": 1.8357584476470947, "rewards/rejected": -1.7179064750671387, "step": 7754 }, { "epoch": 0.89, "learning_rate": 3.2272035584689215e-08, "logits/chosen": -2.618408679962158, "logits/rejected": -2.82509446144104, "logps/chosen": -278.5928955078125, "logps/rejected": -273.00006103515625, "loss": 0.3632, "rewards/accuracies": 0.875, "rewards/chosen": -0.12050643563270569, "rewards/margins": 1.4834814071655273, "rewards/rejected": -1.6039879322052002, "step": 7755 }, { "epoch": 0.89, "learning_rate": 3.223691911506496e-08, "logits/chosen": -2.839402914047241, "logits/rejected": -3.096262216567993, "logps/chosen": -254.1485595703125, "logps/rejected": -217.7930450439453, "loss": 0.2863, "rewards/accuracies": 0.875, "rewards/chosen": 0.18049967288970947, "rewards/margins": 1.8717021942138672, "rewards/rejected": -1.6912025213241577, "step": 7756 }, { "epoch": 0.89, "learning_rate": 3.220180264544071e-08, "logits/chosen": -3.6699399948120117, "logits/rejected": -3.4313488006591797, "logps/chosen": -116.4141845703125, "logps/rejected": -128.45945739746094, "loss": 0.5258, "rewards/accuracies": 0.75, "rewards/chosen": -0.9416418671607971, "rewards/margins": 1.3592443466186523, "rewards/rejected": -2.3008861541748047, "step": 7757 }, { "epoch": 0.89, "learning_rate": 3.216668617581646e-08, "logits/chosen": -3.0435125827789307, "logits/rejected": -3.1494460105895996, "logps/chosen": -288.0492248535156, "logps/rejected": -336.267333984375, "loss": 0.3926, "rewards/accuracies": 0.75, "rewards/chosen": 0.17485564947128296, "rewards/margins": 2.082815170288086, "rewards/rejected": -1.9079595804214478, "step": 7758 }, { "epoch": 0.89, "learning_rate": 3.2131569706192204e-08, "logits/chosen": -2.776538848876953, "logits/rejected": -2.926887273788452, "logps/chosen": -497.0159606933594, "logps/rejected": -382.6397399902344, "loss": 0.854, "rewards/accuracies": 0.625, "rewards/chosen": 0.20331935584545135, "rewards/margins": 0.7104790210723877, "rewards/rejected": -0.5071597099304199, "step": 7759 }, { "epoch": 0.89, "learning_rate": 3.209645323656795e-08, "logits/chosen": -2.823354482650757, "logits/rejected": -2.928422451019287, "logps/chosen": -506.94903564453125, "logps/rejected": -423.1087341308594, "loss": 0.4125, "rewards/accuracies": 0.75, "rewards/chosen": -0.10480528324842453, "rewards/margins": 2.150928020477295, "rewards/rejected": -2.2557332515716553, "step": 7760 }, { "epoch": 0.89, "learning_rate": 3.206133676694369e-08, "logits/chosen": -3.31866717338562, "logits/rejected": -3.1086699962615967, "logps/chosen": -267.66571044921875, "logps/rejected": -221.0437774658203, "loss": 0.4744, "rewards/accuracies": 0.75, "rewards/chosen": 0.09812498092651367, "rewards/margins": 1.027414083480835, "rewards/rejected": -0.9292891025543213, "step": 7761 }, { "epoch": 0.89, "learning_rate": 3.202622029731944e-08, "logits/chosen": -2.9873719215393066, "logits/rejected": -3.056243658065796, "logps/chosen": -137.45883178710938, "logps/rejected": -155.4303741455078, "loss": 0.4329, "rewards/accuracies": 0.625, "rewards/chosen": -0.3647722601890564, "rewards/margins": 1.3844400644302368, "rewards/rejected": -1.749212384223938, "step": 7762 }, { "epoch": 0.89, "learning_rate": 3.1991103827695187e-08, "logits/chosen": -3.31479811668396, "logits/rejected": -3.318800210952759, "logps/chosen": -286.53875732421875, "logps/rejected": -258.2714538574219, "loss": 0.242, "rewards/accuracies": 1.0, "rewards/chosen": -0.14727306365966797, "rewards/margins": 2.5947091579437256, "rewards/rejected": -2.7419822216033936, "step": 7763 }, { "epoch": 0.9, "learning_rate": 3.1955987358070934e-08, "logits/chosen": -2.8956830501556396, "logits/rejected": -2.9655487537384033, "logps/chosen": -297.3709716796875, "logps/rejected": -264.487548828125, "loss": 0.2554, "rewards/accuracies": 1.0, "rewards/chosen": -0.3373333215713501, "rewards/margins": 1.7924246788024902, "rewards/rejected": -2.12975811958313, "step": 7764 }, { "epoch": 0.9, "learning_rate": 3.192087088844668e-08, "logits/chosen": -3.184483528137207, "logits/rejected": -3.110083818435669, "logps/chosen": -138.47952270507812, "logps/rejected": -251.5528564453125, "loss": 0.3499, "rewards/accuracies": 0.75, "rewards/chosen": -0.05075228959321976, "rewards/margins": 3.2091825008392334, "rewards/rejected": -3.259934902191162, "step": 7765 }, { "epoch": 0.9, "learning_rate": 3.188575441882243e-08, "logits/chosen": -3.733704090118408, "logits/rejected": -3.7118403911590576, "logps/chosen": -197.01849365234375, "logps/rejected": -183.10275268554688, "loss": 0.2847, "rewards/accuracies": 0.75, "rewards/chosen": 0.13157439231872559, "rewards/margins": 2.3602962493896484, "rewards/rejected": -2.228721857070923, "step": 7766 }, { "epoch": 0.9, "learning_rate": 3.1850637949198175e-08, "logits/chosen": -3.5294461250305176, "logits/rejected": -3.3068795204162598, "logps/chosen": -382.39642333984375, "logps/rejected": -225.06381225585938, "loss": 0.2532, "rewards/accuracies": 0.875, "rewards/chosen": 0.047955721616744995, "rewards/margins": 2.3628242015838623, "rewards/rejected": -2.314868450164795, "step": 7767 }, { "epoch": 0.9, "learning_rate": 3.1815521479573916e-08, "logits/chosen": -3.6067750453948975, "logits/rejected": -3.645845890045166, "logps/chosen": -220.69253540039062, "logps/rejected": -225.7537078857422, "loss": 0.6013, "rewards/accuracies": 0.625, "rewards/chosen": -0.4500094950199127, "rewards/margins": 0.6037793159484863, "rewards/rejected": -1.0537887811660767, "step": 7768 }, { "epoch": 0.9, "learning_rate": 3.1780405009949663e-08, "logits/chosen": -3.099208354949951, "logits/rejected": -3.1625473499298096, "logps/chosen": -222.03497314453125, "logps/rejected": -251.26455688476562, "loss": 0.5618, "rewards/accuracies": 0.5, "rewards/chosen": -0.4104800820350647, "rewards/margins": 0.5272287130355835, "rewards/rejected": -0.937708854675293, "step": 7769 }, { "epoch": 0.9, "learning_rate": 3.174528854032541e-08, "logits/chosen": -3.031942844390869, "logits/rejected": -3.3723647594451904, "logps/chosen": -210.31263732910156, "logps/rejected": -329.3812255859375, "loss": 0.4554, "rewards/accuracies": 0.75, "rewards/chosen": -0.2958024740219116, "rewards/margins": 2.5137486457824707, "rewards/rejected": -2.809551239013672, "step": 7770 }, { "epoch": 0.9, "learning_rate": 3.171017207070116e-08, "logits/chosen": -2.6717443466186523, "logits/rejected": -3.0957465171813965, "logps/chosen": -201.66534423828125, "logps/rejected": -286.53790283203125, "loss": 0.2718, "rewards/accuracies": 0.875, "rewards/chosen": 0.007530152797698975, "rewards/margins": 3.7942147254943848, "rewards/rejected": -3.786684513092041, "step": 7771 }, { "epoch": 0.9, "learning_rate": 3.1675055601076905e-08, "logits/chosen": -2.70711088180542, "logits/rejected": -3.074040174484253, "logps/chosen": -158.69796752929688, "logps/rejected": -113.05538940429688, "loss": 0.5226, "rewards/accuracies": 0.625, "rewards/chosen": -0.21361547708511353, "rewards/margins": 0.684147834777832, "rewards/rejected": -0.8977632522583008, "step": 7772 }, { "epoch": 0.9, "learning_rate": 3.163993913145265e-08, "logits/chosen": -2.316713333129883, "logits/rejected": -2.244729518890381, "logps/chosen": -242.96124267578125, "logps/rejected": -278.79638671875, "loss": 0.4086, "rewards/accuracies": 0.875, "rewards/chosen": 0.16444630920886993, "rewards/margins": 1.1293755769729614, "rewards/rejected": -0.9649292230606079, "step": 7773 }, { "epoch": 0.9, "learning_rate": 3.16048226618284e-08, "logits/chosen": -2.748243570327759, "logits/rejected": -3.004669666290283, "logps/chosen": -289.43267822265625, "logps/rejected": -368.7463684082031, "loss": 0.2814, "rewards/accuracies": 0.875, "rewards/chosen": -0.21640148758888245, "rewards/margins": 2.043475389480591, "rewards/rejected": -2.2598769664764404, "step": 7774 }, { "epoch": 0.9, "learning_rate": 3.156970619220414e-08, "logits/chosen": -3.3452210426330566, "logits/rejected": -3.42846417427063, "logps/chosen": -235.58432006835938, "logps/rejected": -225.80458068847656, "loss": 0.4027, "rewards/accuracies": 0.75, "rewards/chosen": 0.0666767805814743, "rewards/margins": 1.915247917175293, "rewards/rejected": -1.8485711812973022, "step": 7775 }, { "epoch": 0.9, "learning_rate": 3.153458972257989e-08, "logits/chosen": -2.774590253829956, "logits/rejected": -2.874406576156616, "logps/chosen": -310.28759765625, "logps/rejected": -301.9608154296875, "loss": 0.1856, "rewards/accuracies": 1.0, "rewards/chosen": 0.15753909945487976, "rewards/margins": 3.1475067138671875, "rewards/rejected": -2.9899673461914062, "step": 7776 }, { "epoch": 0.9, "learning_rate": 3.1499473252955635e-08, "logits/chosen": -2.264498710632324, "logits/rejected": -2.3836610317230225, "logps/chosen": -284.7121887207031, "logps/rejected": -241.90484619140625, "loss": 0.4491, "rewards/accuracies": 0.75, "rewards/chosen": 0.25553351640701294, "rewards/margins": 1.3291255235671997, "rewards/rejected": -1.073591947555542, "step": 7777 }, { "epoch": 0.9, "learning_rate": 3.146435678333138e-08, "logits/chosen": -3.0895509719848633, "logits/rejected": -2.704430103302002, "logps/chosen": -266.28118896484375, "logps/rejected": -179.96063232421875, "loss": 0.3082, "rewards/accuracies": 0.75, "rewards/chosen": 0.055748552083969116, "rewards/margins": 1.9948947429656982, "rewards/rejected": -1.9391462802886963, "step": 7778 }, { "epoch": 0.9, "learning_rate": 3.142924031370713e-08, "logits/chosen": -2.9867701530456543, "logits/rejected": -2.99653959274292, "logps/chosen": -331.7909851074219, "logps/rejected": -199.41397094726562, "loss": 0.2601, "rewards/accuracies": 1.0, "rewards/chosen": 0.24757876992225647, "rewards/margins": 1.4736449718475342, "rewards/rejected": -1.2260662317276, "step": 7779 }, { "epoch": 0.9, "learning_rate": 3.1394123844082876e-08, "logits/chosen": -2.8842945098876953, "logits/rejected": -2.9293980598449707, "logps/chosen": -265.8250732421875, "logps/rejected": -167.9994659423828, "loss": 0.4786, "rewards/accuracies": 0.625, "rewards/chosen": -0.424845427274704, "rewards/margins": 1.3890703916549683, "rewards/rejected": -1.813915729522705, "step": 7780 }, { "epoch": 0.9, "learning_rate": 3.1359007374458624e-08, "logits/chosen": -2.7005465030670166, "logits/rejected": -2.6682260036468506, "logps/chosen": -413.13348388671875, "logps/rejected": -268.87249755859375, "loss": 0.5184, "rewards/accuracies": 0.75, "rewards/chosen": -0.10275600850582123, "rewards/margins": 1.3545186519622803, "rewards/rejected": -1.4572747945785522, "step": 7781 }, { "epoch": 0.9, "learning_rate": 3.1323890904834364e-08, "logits/chosen": -2.602792739868164, "logits/rejected": -2.6643996238708496, "logps/chosen": -287.07562255859375, "logps/rejected": -310.04608154296875, "loss": 0.2275, "rewards/accuracies": 1.0, "rewards/chosen": 0.008390024304389954, "rewards/margins": 2.3074727058410645, "rewards/rejected": -2.2990827560424805, "step": 7782 }, { "epoch": 0.9, "learning_rate": 3.128877443521011e-08, "logits/chosen": -3.18046236038208, "logits/rejected": -3.3441717624664307, "logps/chosen": -114.50439453125, "logps/rejected": -211.90164184570312, "loss": 0.2869, "rewards/accuracies": 1.0, "rewards/chosen": 0.0009965971112251282, "rewards/margins": 2.2106752395629883, "rewards/rejected": -2.2096786499023438, "step": 7783 }, { "epoch": 0.9, "learning_rate": 3.125365796558586e-08, "logits/chosen": -3.045872688293457, "logits/rejected": -2.9902167320251465, "logps/chosen": -254.2425537109375, "logps/rejected": -261.32513427734375, "loss": 0.3934, "rewards/accuracies": 0.75, "rewards/chosen": -0.08585759997367859, "rewards/margins": 1.0988686084747314, "rewards/rejected": -1.1847261190414429, "step": 7784 }, { "epoch": 0.9, "learning_rate": 3.1218541495961606e-08, "logits/chosen": -3.0430636405944824, "logits/rejected": -2.8303310871124268, "logps/chosen": -207.12075805664062, "logps/rejected": -246.49264526367188, "loss": 0.5089, "rewards/accuracies": 0.75, "rewards/chosen": -0.6795649528503418, "rewards/margins": 1.8767845630645752, "rewards/rejected": -2.556349277496338, "step": 7785 }, { "epoch": 0.9, "learning_rate": 3.118342502633735e-08, "logits/chosen": -2.804896831512451, "logits/rejected": -3.017408847808838, "logps/chosen": -204.60910034179688, "logps/rejected": -195.4331817626953, "loss": 0.3945, "rewards/accuracies": 0.75, "rewards/chosen": -0.5684860944747925, "rewards/margins": 1.8028619289398193, "rewards/rejected": -2.3713481426239014, "step": 7786 }, { "epoch": 0.9, "learning_rate": 3.11483085567131e-08, "logits/chosen": -3.0154473781585693, "logits/rejected": -3.374267578125, "logps/chosen": -396.72540283203125, "logps/rejected": -368.5937194824219, "loss": 0.5768, "rewards/accuracies": 0.625, "rewards/chosen": -0.546332597732544, "rewards/margins": 1.6459972858428955, "rewards/rejected": -2.1923298835754395, "step": 7787 }, { "epoch": 0.9, "learning_rate": 3.111319208708885e-08, "logits/chosen": -3.213884115219116, "logits/rejected": -3.232037305831909, "logps/chosen": -190.18780517578125, "logps/rejected": -119.09656524658203, "loss": 0.5027, "rewards/accuracies": 0.75, "rewards/chosen": -0.19126199185848236, "rewards/margins": 1.449885368347168, "rewards/rejected": -1.6411473751068115, "step": 7788 }, { "epoch": 0.9, "learning_rate": 3.107807561746459e-08, "logits/chosen": -2.7316083908081055, "logits/rejected": -2.5449788570404053, "logps/chosen": -251.4188232421875, "logps/rejected": -233.43865966796875, "loss": 0.1883, "rewards/accuracies": 1.0, "rewards/chosen": 0.3227115571498871, "rewards/margins": 2.0322113037109375, "rewards/rejected": -1.7094998359680176, "step": 7789 }, { "epoch": 0.9, "learning_rate": 3.1042959147840335e-08, "logits/chosen": -3.3227922916412354, "logits/rejected": -2.8386964797973633, "logps/chosen": -254.95440673828125, "logps/rejected": -259.3973388671875, "loss": 0.3221, "rewards/accuracies": 0.75, "rewards/chosen": -0.3963538110256195, "rewards/margins": 1.5987297296524048, "rewards/rejected": -1.9950834512710571, "step": 7790 }, { "epoch": 0.9, "learning_rate": 3.100784267821608e-08, "logits/chosen": -2.4748153686523438, "logits/rejected": -2.434777021408081, "logps/chosen": -263.6317138671875, "logps/rejected": -151.89529418945312, "loss": 0.3675, "rewards/accuracies": 0.625, "rewards/chosen": -0.1286814659833908, "rewards/margins": 1.5517648458480835, "rewards/rejected": -1.6804462671279907, "step": 7791 }, { "epoch": 0.9, "learning_rate": 3.0972726208591823e-08, "logits/chosen": -2.6453375816345215, "logits/rejected": -2.490499496459961, "logps/chosen": -162.45098876953125, "logps/rejected": -252.7466583251953, "loss": 0.4845, "rewards/accuracies": 0.75, "rewards/chosen": -0.3012300431728363, "rewards/margins": 1.5879521369934082, "rewards/rejected": -1.8891820907592773, "step": 7792 }, { "epoch": 0.9, "learning_rate": 3.093760973896757e-08, "logits/chosen": -2.5269856452941895, "logits/rejected": -2.5486247539520264, "logps/chosen": -288.6363830566406, "logps/rejected": -170.61572265625, "loss": 0.4178, "rewards/accuracies": 1.0, "rewards/chosen": -0.30026501417160034, "rewards/margins": 0.8973388075828552, "rewards/rejected": -1.1976038217544556, "step": 7793 }, { "epoch": 0.9, "learning_rate": 3.090249326934332e-08, "logits/chosen": -3.04618501663208, "logits/rejected": -3.4729700088500977, "logps/chosen": -253.50790405273438, "logps/rejected": -146.75161743164062, "loss": 0.587, "rewards/accuracies": 0.5, "rewards/chosen": -0.29081982374191284, "rewards/margins": 0.5287672281265259, "rewards/rejected": -0.8195871114730835, "step": 7794 }, { "epoch": 0.9, "learning_rate": 3.0867376799719065e-08, "logits/chosen": -2.771589756011963, "logits/rejected": -2.701406955718994, "logps/chosen": -207.8033447265625, "logps/rejected": -298.49871826171875, "loss": 0.4703, "rewards/accuracies": 0.875, "rewards/chosen": -0.8988261222839355, "rewards/margins": 2.344109058380127, "rewards/rejected": -3.2429351806640625, "step": 7795 }, { "epoch": 0.9, "learning_rate": 3.083226033009481e-08, "logits/chosen": -3.302011251449585, "logits/rejected": -2.896148204803467, "logps/chosen": -408.2320861816406, "logps/rejected": -246.6964874267578, "loss": 0.4095, "rewards/accuracies": 0.625, "rewards/chosen": -0.22144433856010437, "rewards/margins": 1.5304447412490845, "rewards/rejected": -1.7518889904022217, "step": 7796 }, { "epoch": 0.9, "learning_rate": 3.079714386047056e-08, "logits/chosen": -3.557800769805908, "logits/rejected": -3.7130162715911865, "logps/chosen": -154.16366577148438, "logps/rejected": -218.97866821289062, "loss": 0.1427, "rewards/accuracies": 1.0, "rewards/chosen": 0.33592191338539124, "rewards/margins": 2.598933219909668, "rewards/rejected": -2.2630114555358887, "step": 7797 }, { "epoch": 0.9, "learning_rate": 3.076202739084631e-08, "logits/chosen": -3.2626242637634277, "logits/rejected": -3.311753511428833, "logps/chosen": -118.90097045898438, "logps/rejected": -212.41732788085938, "loss": 0.3115, "rewards/accuracies": 0.875, "rewards/chosen": -0.38824307918548584, "rewards/margins": 1.7958723306655884, "rewards/rejected": -2.184115171432495, "step": 7798 }, { "epoch": 0.9, "learning_rate": 3.072691092122205e-08, "logits/chosen": -3.6691017150878906, "logits/rejected": -3.487724781036377, "logps/chosen": -201.18531799316406, "logps/rejected": -172.29026794433594, "loss": 0.8268, "rewards/accuracies": 0.625, "rewards/chosen": -0.3854404389858246, "rewards/margins": 0.47551342844963074, "rewards/rejected": -0.8609538078308105, "step": 7799 }, { "epoch": 0.9, "learning_rate": 3.0691794451597795e-08, "logits/chosen": -3.098599910736084, "logits/rejected": -3.1018755435943604, "logps/chosen": -225.54949951171875, "logps/rejected": -370.26690673828125, "loss": 0.2393, "rewards/accuracies": 0.875, "rewards/chosen": 0.1196582019329071, "rewards/margins": 2.7772297859191895, "rewards/rejected": -2.65757155418396, "step": 7800 }, { "epoch": 0.9, "learning_rate": 3.065667798197354e-08, "logits/chosen": -2.8333261013031006, "logits/rejected": -3.092324733734131, "logps/chosen": -140.63760375976562, "logps/rejected": -196.13363647460938, "loss": 0.1865, "rewards/accuracies": 1.0, "rewards/chosen": -0.09240604192018509, "rewards/margins": 2.437268018722534, "rewards/rejected": -2.5296740531921387, "step": 7801 }, { "epoch": 0.9, "learning_rate": 3.062156151234929e-08, "logits/chosen": -2.8085546493530273, "logits/rejected": -2.9746334552764893, "logps/chosen": -182.75848388671875, "logps/rejected": -182.48367309570312, "loss": 0.513, "rewards/accuracies": 0.75, "rewards/chosen": -0.4765058159828186, "rewards/margins": 1.1118367910385132, "rewards/rejected": -1.5883426666259766, "step": 7802 }, { "epoch": 0.9, "learning_rate": 3.0586445042725036e-08, "logits/chosen": -3.2692179679870605, "logits/rejected": -3.3894081115722656, "logps/chosen": -284.3343200683594, "logps/rejected": -218.2522735595703, "loss": 0.4335, "rewards/accuracies": 0.75, "rewards/chosen": -0.1991048902273178, "rewards/margins": 1.4168310165405273, "rewards/rejected": -1.615936040878296, "step": 7803 }, { "epoch": 0.9, "learning_rate": 3.0551328573100784e-08, "logits/chosen": -3.9299569129943848, "logits/rejected": -3.853154182434082, "logps/chosen": -332.6553649902344, "logps/rejected": -303.6148986816406, "loss": 0.5125, "rewards/accuracies": 0.625, "rewards/chosen": -0.08901393413543701, "rewards/margins": 1.2583497762680054, "rewards/rejected": -1.3473635911941528, "step": 7804 }, { "epoch": 0.9, "learning_rate": 3.051621210347653e-08, "logits/chosen": -3.4354915618896484, "logits/rejected": -3.329554557800293, "logps/chosen": -179.05979919433594, "logps/rejected": -210.20828247070312, "loss": 0.3799, "rewards/accuracies": 0.875, "rewards/chosen": 0.6075061559677124, "rewards/margins": 1.6012015342712402, "rewards/rejected": -0.9936953783035278, "step": 7805 }, { "epoch": 0.9, "learning_rate": 3.048109563385227e-08, "logits/chosen": -3.693774938583374, "logits/rejected": -3.6042940616607666, "logps/chosen": -311.261474609375, "logps/rejected": -208.22079467773438, "loss": 0.1724, "rewards/accuracies": 1.0, "rewards/chosen": 0.05439196527004242, "rewards/margins": 2.8182196617126465, "rewards/rejected": -2.763827323913574, "step": 7806 }, { "epoch": 0.9, "learning_rate": 3.044597916422802e-08, "logits/chosen": -3.107422351837158, "logits/rejected": -3.048698663711548, "logps/chosen": -245.4534149169922, "logps/rejected": -251.77638244628906, "loss": 0.6014, "rewards/accuracies": 0.625, "rewards/chosen": -0.15875597298145294, "rewards/margins": 0.4813253879547119, "rewards/rejected": -0.6400814652442932, "step": 7807 }, { "epoch": 0.9, "learning_rate": 3.0410862694603766e-08, "logits/chosen": -2.9945740699768066, "logits/rejected": -2.993337869644165, "logps/chosen": -168.14230346679688, "logps/rejected": -160.8536376953125, "loss": 0.7263, "rewards/accuracies": 0.625, "rewards/chosen": -0.05640730261802673, "rewards/margins": 0.7278283834457397, "rewards/rejected": -0.7842357158660889, "step": 7808 }, { "epoch": 0.9, "learning_rate": 3.037574622497951e-08, "logits/chosen": -3.0626254081726074, "logits/rejected": -3.1552796363830566, "logps/chosen": -439.6769104003906, "logps/rejected": -254.32872009277344, "loss": 0.2691, "rewards/accuracies": 0.875, "rewards/chosen": -0.498441219329834, "rewards/margins": 2.452948570251465, "rewards/rejected": -2.951389789581299, "step": 7809 }, { "epoch": 0.9, "learning_rate": 3.034062975535526e-08, "logits/chosen": -3.800781726837158, "logits/rejected": -3.650975227355957, "logps/chosen": -346.2925109863281, "logps/rejected": -284.1558532714844, "loss": 0.2709, "rewards/accuracies": 0.875, "rewards/chosen": -0.13335317373275757, "rewards/margins": 3.2688796520233154, "rewards/rejected": -3.4022326469421387, "step": 7810 }, { "epoch": 0.9, "learning_rate": 3.030551328573101e-08, "logits/chosen": -3.189026117324829, "logits/rejected": -3.3325459957122803, "logps/chosen": -232.50543212890625, "logps/rejected": -255.28515625, "loss": 0.6001, "rewards/accuracies": 0.625, "rewards/chosen": -0.5139163136482239, "rewards/margins": 0.289531409740448, "rewards/rejected": -0.8034477233886719, "step": 7811 }, { "epoch": 0.9, "learning_rate": 3.0270396816106755e-08, "logits/chosen": -3.1647489070892334, "logits/rejected": -3.0846970081329346, "logps/chosen": -350.3502502441406, "logps/rejected": -295.945556640625, "loss": 0.2083, "rewards/accuracies": 1.0, "rewards/chosen": 0.44555580615997314, "rewards/margins": 2.2534523010253906, "rewards/rejected": -1.807896614074707, "step": 7812 }, { "epoch": 0.9, "learning_rate": 3.0235280346482495e-08, "logits/chosen": -3.0270113945007324, "logits/rejected": -2.7543015480041504, "logps/chosen": -226.32632446289062, "logps/rejected": -251.8033447265625, "loss": 0.2044, "rewards/accuracies": 0.875, "rewards/chosen": 0.271738737821579, "rewards/margins": 2.424967050552368, "rewards/rejected": -2.153228282928467, "step": 7813 }, { "epoch": 0.9, "learning_rate": 3.020016387685824e-08, "logits/chosen": -2.6890745162963867, "logits/rejected": -2.9330434799194336, "logps/chosen": -343.00531005859375, "logps/rejected": -320.06005859375, "loss": 0.3648, "rewards/accuracies": 0.875, "rewards/chosen": -0.07912154495716095, "rewards/margins": 1.6670434474945068, "rewards/rejected": -1.7461650371551514, "step": 7814 }, { "epoch": 0.9, "learning_rate": 3.016504740723399e-08, "logits/chosen": -3.3053715229034424, "logits/rejected": -3.0547518730163574, "logps/chosen": -320.659912109375, "logps/rejected": -359.66436767578125, "loss": 0.3261, "rewards/accuracies": 0.875, "rewards/chosen": -0.7376658320426941, "rewards/margins": 2.6159353256225586, "rewards/rejected": -3.3536014556884766, "step": 7815 }, { "epoch": 0.9, "learning_rate": 3.012993093760974e-08, "logits/chosen": -3.113060712814331, "logits/rejected": -3.3458147048950195, "logps/chosen": -295.1081848144531, "logps/rejected": -203.1611328125, "loss": 0.3126, "rewards/accuracies": 0.875, "rewards/chosen": 0.23938781023025513, "rewards/margins": 2.1724114418029785, "rewards/rejected": -1.9330233335494995, "step": 7816 }, { "epoch": 0.9, "learning_rate": 3.0094814467985484e-08, "logits/chosen": -2.7694664001464844, "logits/rejected": -2.5070791244506836, "logps/chosen": -281.05206298828125, "logps/rejected": -274.0117492675781, "loss": 0.333, "rewards/accuracies": 0.875, "rewards/chosen": -0.05018618330359459, "rewards/margins": 1.284712791442871, "rewards/rejected": -1.3348989486694336, "step": 7817 }, { "epoch": 0.9, "learning_rate": 3.005969799836123e-08, "logits/chosen": -3.1946730613708496, "logits/rejected": -3.1040821075439453, "logps/chosen": -207.4013671875, "logps/rejected": -353.5582275390625, "loss": 0.594, "rewards/accuracies": 0.625, "rewards/chosen": -0.6908109188079834, "rewards/margins": 2.6344292163848877, "rewards/rejected": -3.325240135192871, "step": 7818 }, { "epoch": 0.9, "learning_rate": 3.002458152873698e-08, "logits/chosen": -3.066459894180298, "logits/rejected": -2.616295099258423, "logps/chosen": -293.3780822753906, "logps/rejected": -181.04324340820312, "loss": 0.3147, "rewards/accuracies": 1.0, "rewards/chosen": -0.3053177297115326, "rewards/margins": 1.344879150390625, "rewards/rejected": -1.6501967906951904, "step": 7819 }, { "epoch": 0.9, "learning_rate": 2.9989465059112726e-08, "logits/chosen": -3.898989677429199, "logits/rejected": -3.645484447479248, "logps/chosen": -162.3035888671875, "logps/rejected": -274.24371337890625, "loss": 0.4812, "rewards/accuracies": 0.75, "rewards/chosen": -0.5253479480743408, "rewards/margins": 2.02262020111084, "rewards/rejected": -2.5479681491851807, "step": 7820 }, { "epoch": 0.9, "learning_rate": 2.995434858948847e-08, "logits/chosen": -3.4251952171325684, "logits/rejected": -2.895747661590576, "logps/chosen": -358.1121520996094, "logps/rejected": -261.7951965332031, "loss": 0.9835, "rewards/accuracies": 0.625, "rewards/chosen": -1.378661036491394, "rewards/margins": 0.4363572597503662, "rewards/rejected": -1.8150184154510498, "step": 7821 }, { "epoch": 0.9, "learning_rate": 2.9919232119864214e-08, "logits/chosen": -2.9101791381835938, "logits/rejected": -2.8709945678710938, "logps/chosen": -301.5269775390625, "logps/rejected": -333.7005310058594, "loss": 0.3811, "rewards/accuracies": 0.75, "rewards/chosen": 0.5649310350418091, "rewards/margins": 1.8188796043395996, "rewards/rejected": -1.253948450088501, "step": 7822 }, { "epoch": 0.9, "learning_rate": 2.988411565023996e-08, "logits/chosen": -3.5448057651519775, "logits/rejected": -3.659106731414795, "logps/chosen": -362.90533447265625, "logps/rejected": -356.2138671875, "loss": 0.4378, "rewards/accuracies": 0.75, "rewards/chosen": -0.27571913599967957, "rewards/margins": 1.5644207000732422, "rewards/rejected": -1.8401397466659546, "step": 7823 }, { "epoch": 0.9, "learning_rate": 2.984899918061571e-08, "logits/chosen": -3.238248825073242, "logits/rejected": -3.2354540824890137, "logps/chosen": -375.36669921875, "logps/rejected": -256.2217712402344, "loss": 0.2654, "rewards/accuracies": 0.875, "rewards/chosen": 0.18133734166622162, "rewards/margins": 2.0289831161499023, "rewards/rejected": -1.847645878791809, "step": 7824 }, { "epoch": 0.9, "learning_rate": 2.9813882710991456e-08, "logits/chosen": -3.1292054653167725, "logits/rejected": -3.1935925483703613, "logps/chosen": -115.11970520019531, "logps/rejected": -224.84814453125, "loss": 0.4173, "rewards/accuracies": 0.75, "rewards/chosen": 0.19043001532554626, "rewards/margins": 1.879460096359253, "rewards/rejected": -1.6890300512313843, "step": 7825 }, { "epoch": 0.9, "learning_rate": 2.97787662413672e-08, "logits/chosen": -3.032172918319702, "logits/rejected": -3.0575919151306152, "logps/chosen": -370.8744201660156, "logps/rejected": -321.0774230957031, "loss": 0.1891, "rewards/accuracies": 0.875, "rewards/chosen": -0.3495209813117981, "rewards/margins": 2.4869134426116943, "rewards/rejected": -2.8364343643188477, "step": 7826 }, { "epoch": 0.9, "learning_rate": 2.9743649771742947e-08, "logits/chosen": -2.802924871444702, "logits/rejected": -2.7647507190704346, "logps/chosen": -262.8232421875, "logps/rejected": -316.03314208984375, "loss": 0.3145, "rewards/accuracies": 0.875, "rewards/chosen": -0.36372318863868713, "rewards/margins": 1.626491665840149, "rewards/rejected": -1.9902148246765137, "step": 7827 }, { "epoch": 0.9, "learning_rate": 2.9708533302118694e-08, "logits/chosen": -2.503469228744507, "logits/rejected": -2.802560329437256, "logps/chosen": -420.9840393066406, "logps/rejected": -272.2008972167969, "loss": 0.3954, "rewards/accuracies": 0.75, "rewards/chosen": -0.5190492868423462, "rewards/margins": 1.7978394031524658, "rewards/rejected": -2.3168883323669434, "step": 7828 }, { "epoch": 0.9, "learning_rate": 2.967341683249444e-08, "logits/chosen": -2.409254789352417, "logits/rejected": -2.4816765785217285, "logps/chosen": -399.53436279296875, "logps/rejected": -298.1506652832031, "loss": 0.2963, "rewards/accuracies": 0.875, "rewards/chosen": 0.33370909094810486, "rewards/margins": 1.6671545505523682, "rewards/rejected": -1.3334453105926514, "step": 7829 }, { "epoch": 0.9, "learning_rate": 2.9638300362870185e-08, "logits/chosen": -2.5328409671783447, "logits/rejected": -2.4208667278289795, "logps/chosen": -388.964111328125, "logps/rejected": -290.7350769042969, "loss": 0.5049, "rewards/accuracies": 0.625, "rewards/chosen": -0.2631969153881073, "rewards/margins": 1.0116833448410034, "rewards/rejected": -1.274880290031433, "step": 7830 }, { "epoch": 0.9, "learning_rate": 2.9603183893245932e-08, "logits/chosen": -3.321373701095581, "logits/rejected": -3.497218608856201, "logps/chosen": -222.80738830566406, "logps/rejected": -335.01605224609375, "loss": 0.3757, "rewards/accuracies": 0.875, "rewards/chosen": -0.2063494473695755, "rewards/margins": 2.531726837158203, "rewards/rejected": -2.7380762100219727, "step": 7831 }, { "epoch": 0.9, "learning_rate": 2.956806742362168e-08, "logits/chosen": -3.524317741394043, "logits/rejected": -3.445833683013916, "logps/chosen": -574.4561157226562, "logps/rejected": -350.5565185546875, "loss": 0.4258, "rewards/accuracies": 0.75, "rewards/chosen": -0.2087143510580063, "rewards/margins": 1.3181647062301636, "rewards/rejected": -1.526879072189331, "step": 7832 }, { "epoch": 0.9, "learning_rate": 2.9532950953997424e-08, "logits/chosen": -2.504621982574463, "logits/rejected": -2.484931707382202, "logps/chosen": -425.83721923828125, "logps/rejected": -292.1983947753906, "loss": 0.3904, "rewards/accuracies": 0.625, "rewards/chosen": 0.05825109779834747, "rewards/margins": 1.7424185276031494, "rewards/rejected": -1.6841673851013184, "step": 7833 }, { "epoch": 0.9, "learning_rate": 2.949783448437317e-08, "logits/chosen": -3.0492172241210938, "logits/rejected": -3.0419390201568604, "logps/chosen": -478.5390625, "logps/rejected": -313.32073974609375, "loss": 0.4581, "rewards/accuracies": 0.75, "rewards/chosen": -0.33983314037323, "rewards/margins": 1.7003750801086426, "rewards/rejected": -2.040208339691162, "step": 7834 }, { "epoch": 0.9, "learning_rate": 2.9462718014748918e-08, "logits/chosen": -3.1191282272338867, "logits/rejected": -3.164759635925293, "logps/chosen": -168.68727111816406, "logps/rejected": -170.16348266601562, "loss": 0.3316, "rewards/accuracies": 0.75, "rewards/chosen": 0.3404485583305359, "rewards/margins": 1.8260252475738525, "rewards/rejected": -1.4855767488479614, "step": 7835 }, { "epoch": 0.9, "learning_rate": 2.9427601545124665e-08, "logits/chosen": -3.1438021659851074, "logits/rejected": -2.7998111248016357, "logps/chosen": -303.9043884277344, "logps/rejected": -259.82720947265625, "loss": 0.3347, "rewards/accuracies": 0.875, "rewards/chosen": -0.33572208881378174, "rewards/margins": 1.7771189212799072, "rewards/rejected": -2.1128411293029785, "step": 7836 }, { "epoch": 0.9, "learning_rate": 2.939248507550041e-08, "logits/chosen": -4.099872589111328, "logits/rejected": -3.6996560096740723, "logps/chosen": -359.8845520019531, "logps/rejected": -254.70347595214844, "loss": 0.161, "rewards/accuracies": 1.0, "rewards/chosen": 0.11736652255058289, "rewards/margins": 1.9536365270614624, "rewards/rejected": -1.8362699747085571, "step": 7837 }, { "epoch": 0.9, "learning_rate": 2.9357368605876157e-08, "logits/chosen": -3.941861629486084, "logits/rejected": -3.7202041149139404, "logps/chosen": -256.68109130859375, "logps/rejected": -159.70281982421875, "loss": 0.5399, "rewards/accuracies": 0.75, "rewards/chosen": 0.0575445182621479, "rewards/margins": 1.7847795486450195, "rewards/rejected": -1.7272350788116455, "step": 7838 }, { "epoch": 0.9, "learning_rate": 2.9322252136251897e-08, "logits/chosen": -3.0995984077453613, "logits/rejected": -3.135566234588623, "logps/chosen": -232.6947021484375, "logps/rejected": -255.1054229736328, "loss": 0.6986, "rewards/accuracies": 0.625, "rewards/chosen": -0.5742395520210266, "rewards/margins": 2.5648908615112305, "rewards/rejected": -3.1391305923461914, "step": 7839 }, { "epoch": 0.9, "learning_rate": 2.9287135666627644e-08, "logits/chosen": -3.3552074432373047, "logits/rejected": -3.2976255416870117, "logps/chosen": -170.05648803710938, "logps/rejected": -216.1123504638672, "loss": 0.4424, "rewards/accuracies": 0.75, "rewards/chosen": -0.27471908926963806, "rewards/margins": 2.084280490875244, "rewards/rejected": -2.358999729156494, "step": 7840 }, { "epoch": 0.9, "learning_rate": 2.925201919700339e-08, "logits/chosen": -2.9034247398376465, "logits/rejected": -2.7599687576293945, "logps/chosen": -557.7071533203125, "logps/rejected": -326.9872741699219, "loss": 1.4051, "rewards/accuracies": 0.375, "rewards/chosen": -0.8963369131088257, "rewards/margins": 0.130376935005188, "rewards/rejected": -1.0267138481140137, "step": 7841 }, { "epoch": 0.9, "learning_rate": 2.921690272737914e-08, "logits/chosen": -2.371365547180176, "logits/rejected": -2.701007604598999, "logps/chosen": -327.18231201171875, "logps/rejected": -284.3662414550781, "loss": 0.5018, "rewards/accuracies": 0.75, "rewards/chosen": 0.035202257335186005, "rewards/margins": 0.7481095194816589, "rewards/rejected": -0.7129073143005371, "step": 7842 }, { "epoch": 0.9, "learning_rate": 2.9181786257754883e-08, "logits/chosen": -3.454371452331543, "logits/rejected": -3.445422887802124, "logps/chosen": -152.30752563476562, "logps/rejected": -257.1273498535156, "loss": 0.3576, "rewards/accuracies": 1.0, "rewards/chosen": -0.3617950677871704, "rewards/margins": 1.118908405303955, "rewards/rejected": -1.4807034730911255, "step": 7843 }, { "epoch": 0.9, "learning_rate": 2.914666978813063e-08, "logits/chosen": -2.805938720703125, "logits/rejected": -2.7990875244140625, "logps/chosen": -168.53256225585938, "logps/rejected": -186.8749542236328, "loss": 0.2783, "rewards/accuracies": 0.875, "rewards/chosen": 0.052128519862890244, "rewards/margins": 1.8649131059646606, "rewards/rejected": -1.8127846717834473, "step": 7844 }, { "epoch": 0.9, "learning_rate": 2.9111553318506377e-08, "logits/chosen": -3.9102749824523926, "logits/rejected": -4.012794017791748, "logps/chosen": -261.440185546875, "logps/rejected": -223.09341430664062, "loss": 0.2039, "rewards/accuracies": 1.0, "rewards/chosen": 0.36316484212875366, "rewards/margins": 1.8029118776321411, "rewards/rejected": -1.4397470951080322, "step": 7845 }, { "epoch": 0.9, "learning_rate": 2.9076436848882125e-08, "logits/chosen": -3.4473016262054443, "logits/rejected": -3.1729891300201416, "logps/chosen": -285.378662109375, "logps/rejected": -275.57110595703125, "loss": 0.2634, "rewards/accuracies": 0.875, "rewards/chosen": -0.5412144660949707, "rewards/margins": 2.2048141956329346, "rewards/rejected": -2.7460289001464844, "step": 7846 }, { "epoch": 0.9, "learning_rate": 2.904132037925787e-08, "logits/chosen": -2.8767664432525635, "logits/rejected": -3.064741373062134, "logps/chosen": -348.1396179199219, "logps/rejected": -257.42633056640625, "loss": 0.2331, "rewards/accuracies": 0.875, "rewards/chosen": 0.1464529037475586, "rewards/margins": 2.8991146087646484, "rewards/rejected": -2.752661943435669, "step": 7847 }, { "epoch": 0.9, "learning_rate": 2.9006203909633616e-08, "logits/chosen": -2.8237881660461426, "logits/rejected": -2.948032855987549, "logps/chosen": -182.38177490234375, "logps/rejected": -325.1276550292969, "loss": 0.3576, "rewards/accuracies": 0.75, "rewards/chosen": 0.18169422447681427, "rewards/margins": 1.8261866569519043, "rewards/rejected": -1.6444923877716064, "step": 7848 }, { "epoch": 0.9, "learning_rate": 2.8971087440009363e-08, "logits/chosen": -3.353363275527954, "logits/rejected": -3.3154592514038086, "logps/chosen": -322.64532470703125, "logps/rejected": -378.2384033203125, "loss": 0.2253, "rewards/accuracies": 1.0, "rewards/chosen": -0.004413112998008728, "rewards/margins": 1.684146523475647, "rewards/rejected": -1.688559651374817, "step": 7849 }, { "epoch": 0.9, "learning_rate": 2.8935970970385107e-08, "logits/chosen": -2.93969464302063, "logits/rejected": -3.0608925819396973, "logps/chosen": -256.9266357421875, "logps/rejected": -186.99636840820312, "loss": 0.3266, "rewards/accuracies": 0.75, "rewards/chosen": 0.551852822303772, "rewards/margins": 2.3994431495666504, "rewards/rejected": -1.8475900888442993, "step": 7850 }, { "epoch": 0.91, "learning_rate": 2.8900854500760854e-08, "logits/chosen": -3.2338733673095703, "logits/rejected": -3.0821585655212402, "logps/chosen": -298.9503173828125, "logps/rejected": -307.85833740234375, "loss": 0.3268, "rewards/accuracies": 0.875, "rewards/chosen": 0.2982020080089569, "rewards/margins": 1.93660569190979, "rewards/rejected": -1.6384036540985107, "step": 7851 }, { "epoch": 0.91, "learning_rate": 2.88657380311366e-08, "logits/chosen": -2.7936437129974365, "logits/rejected": -3.171645164489746, "logps/chosen": -361.306640625, "logps/rejected": -232.93421936035156, "loss": 0.7276, "rewards/accuracies": 0.375, "rewards/chosen": -0.6817001104354858, "rewards/margins": 1.026941180229187, "rewards/rejected": -1.7086412906646729, "step": 7852 }, { "epoch": 0.91, "learning_rate": 2.883062156151235e-08, "logits/chosen": -3.438666343688965, "logits/rejected": -3.2109484672546387, "logps/chosen": -319.41796875, "logps/rejected": -420.9537658691406, "loss": 0.446, "rewards/accuracies": 0.75, "rewards/chosen": -0.11063627153635025, "rewards/margins": 1.7314003705978394, "rewards/rejected": -1.842036485671997, "step": 7853 }, { "epoch": 0.91, "learning_rate": 2.8795505091888092e-08, "logits/chosen": -3.3926024436950684, "logits/rejected": -3.1557838916778564, "logps/chosen": -253.06130981445312, "logps/rejected": -201.3717803955078, "loss": 0.4079, "rewards/accuracies": 0.875, "rewards/chosen": -0.14810988306999207, "rewards/margins": 1.2894110679626465, "rewards/rejected": -1.437520980834961, "step": 7854 }, { "epoch": 0.91, "learning_rate": 2.876038862226384e-08, "logits/chosen": -3.380932331085205, "logits/rejected": -3.9840683937072754, "logps/chosen": -91.88964080810547, "logps/rejected": -224.34095764160156, "loss": 0.1713, "rewards/accuracies": 0.875, "rewards/chosen": 0.2748192846775055, "rewards/margins": 3.480587959289551, "rewards/rejected": -3.2057688236236572, "step": 7855 }, { "epoch": 0.91, "learning_rate": 2.8725272152639587e-08, "logits/chosen": -2.2590932846069336, "logits/rejected": -2.3274283409118652, "logps/chosen": -303.5101318359375, "logps/rejected": -297.15191650390625, "loss": 0.391, "rewards/accuracies": 0.875, "rewards/chosen": -0.4728449881076813, "rewards/margins": 1.832222819328308, "rewards/rejected": -2.305067777633667, "step": 7856 }, { "epoch": 0.91, "learning_rate": 2.869015568301533e-08, "logits/chosen": -2.801412582397461, "logits/rejected": -2.5191047191619873, "logps/chosen": -203.75830078125, "logps/rejected": -176.73727416992188, "loss": 0.2634, "rewards/accuracies": 1.0, "rewards/chosen": -0.04368014633655548, "rewards/margins": 1.4589707851409912, "rewards/rejected": -1.5026509761810303, "step": 7857 }, { "epoch": 0.91, "learning_rate": 2.8655039213391078e-08, "logits/chosen": -2.69659423828125, "logits/rejected": -3.0463767051696777, "logps/chosen": -192.69546508789062, "logps/rejected": -172.5802459716797, "loss": 0.4869, "rewards/accuracies": 0.75, "rewards/chosen": -0.6653135418891907, "rewards/margins": 1.454699993133545, "rewards/rejected": -2.120013475418091, "step": 7858 }, { "epoch": 0.91, "learning_rate": 2.8619922743766825e-08, "logits/chosen": -2.970149040222168, "logits/rejected": -2.948598861694336, "logps/chosen": -199.190185546875, "logps/rejected": -179.20370483398438, "loss": 0.3944, "rewards/accuracies": 0.75, "rewards/chosen": 0.21554023027420044, "rewards/margins": 2.0987606048583984, "rewards/rejected": -1.8832203149795532, "step": 7859 }, { "epoch": 0.91, "learning_rate": 2.8584806274142573e-08, "logits/chosen": -2.7225728034973145, "logits/rejected": -2.8144214153289795, "logps/chosen": -216.82403564453125, "logps/rejected": -301.126708984375, "loss": 0.3211, "rewards/accuracies": 0.875, "rewards/chosen": -0.6521344184875488, "rewards/margins": 1.967595100402832, "rewards/rejected": -2.6197292804718018, "step": 7860 }, { "epoch": 0.91, "learning_rate": 2.8549689804518317e-08, "logits/chosen": -3.2249643802642822, "logits/rejected": -3.240175485610962, "logps/chosen": -266.2273254394531, "logps/rejected": -271.55987548828125, "loss": 0.3854, "rewards/accuracies": 0.75, "rewards/chosen": -0.24593773484230042, "rewards/margins": 1.7364988327026367, "rewards/rejected": -1.9824365377426147, "step": 7861 }, { "epoch": 0.91, "learning_rate": 2.8514573334894064e-08, "logits/chosen": -3.0681686401367188, "logits/rejected": -3.340385675430298, "logps/chosen": -89.01580047607422, "logps/rejected": -234.29681396484375, "loss": 0.3287, "rewards/accuracies": 0.875, "rewards/chosen": -0.5759316086769104, "rewards/margins": 2.196342706680298, "rewards/rejected": -2.7722742557525635, "step": 7862 }, { "epoch": 0.91, "learning_rate": 2.847945686526981e-08, "logits/chosen": -2.6855578422546387, "logits/rejected": -2.8005542755126953, "logps/chosen": -171.07489013671875, "logps/rejected": -170.78277587890625, "loss": 0.52, "rewards/accuracies": 0.75, "rewards/chosen": -0.5896208882331848, "rewards/margins": 1.089625597000122, "rewards/rejected": -1.6792463064193726, "step": 7863 }, { "epoch": 0.91, "learning_rate": 2.8444340395645558e-08, "logits/chosen": -3.261836528778076, "logits/rejected": -3.0541462898254395, "logps/chosen": -275.8838195800781, "logps/rejected": -213.26828002929688, "loss": 0.3057, "rewards/accuracies": 0.875, "rewards/chosen": -0.11604300141334534, "rewards/margins": 2.511101722717285, "rewards/rejected": -2.6271448135375977, "step": 7864 }, { "epoch": 0.91, "learning_rate": 2.8409223926021302e-08, "logits/chosen": -2.9331276416778564, "logits/rejected": -3.182896852493286, "logps/chosen": -312.85699462890625, "logps/rejected": -384.49169921875, "loss": 0.3113, "rewards/accuracies": 0.75, "rewards/chosen": -0.664020299911499, "rewards/margins": 2.1916065216064453, "rewards/rejected": -2.8556270599365234, "step": 7865 }, { "epoch": 0.91, "learning_rate": 2.837410745639705e-08, "logits/chosen": -3.4103686809539795, "logits/rejected": -3.410764455795288, "logps/chosen": -231.1832275390625, "logps/rejected": -271.01727294921875, "loss": 0.2596, "rewards/accuracies": 0.875, "rewards/chosen": 0.2503363788127899, "rewards/margins": 2.4205029010772705, "rewards/rejected": -2.170166492462158, "step": 7866 }, { "epoch": 0.91, "learning_rate": 2.8338990986772797e-08, "logits/chosen": -2.976512908935547, "logits/rejected": -3.441013813018799, "logps/chosen": -282.9111328125, "logps/rejected": -303.06060791015625, "loss": 0.5314, "rewards/accuracies": 0.625, "rewards/chosen": -0.15608254075050354, "rewards/margins": 1.527730107307434, "rewards/rejected": -1.6838124990463257, "step": 7867 }, { "epoch": 0.91, "learning_rate": 2.830387451714854e-08, "logits/chosen": -2.2706379890441895, "logits/rejected": -2.304987907409668, "logps/chosen": -223.05523681640625, "logps/rejected": -255.6548614501953, "loss": 0.3925, "rewards/accuracies": 0.75, "rewards/chosen": -0.2698545455932617, "rewards/margins": 2.0810041427612305, "rewards/rejected": -2.350858449935913, "step": 7868 }, { "epoch": 0.91, "learning_rate": 2.8268758047524288e-08, "logits/chosen": -3.187497615814209, "logits/rejected": -2.9333109855651855, "logps/chosen": -155.69381713867188, "logps/rejected": -190.18092346191406, "loss": 0.432, "rewards/accuracies": 0.75, "rewards/chosen": 0.15649613738059998, "rewards/margins": 1.3555160760879517, "rewards/rejected": -1.1990197896957397, "step": 7869 }, { "epoch": 0.91, "learning_rate": 2.8233641577900035e-08, "logits/chosen": -3.616450071334839, "logits/rejected": -3.5422778129577637, "logps/chosen": -257.33001708984375, "logps/rejected": -404.42913818359375, "loss": 0.3182, "rewards/accuracies": 0.75, "rewards/chosen": -0.22646081447601318, "rewards/margins": 2.4391775131225586, "rewards/rejected": -2.6656386852264404, "step": 7870 }, { "epoch": 0.91, "learning_rate": 2.8198525108275782e-08, "logits/chosen": -3.0948143005371094, "logits/rejected": -3.099029541015625, "logps/chosen": -182.31103515625, "logps/rejected": -218.478515625, "loss": 0.4845, "rewards/accuracies": 0.875, "rewards/chosen": -0.056755825877189636, "rewards/margins": 0.9922435879707336, "rewards/rejected": -1.0489994287490845, "step": 7871 }, { "epoch": 0.91, "learning_rate": 2.8163408638651526e-08, "logits/chosen": -2.631380081176758, "logits/rejected": -2.7525432109832764, "logps/chosen": -227.43600463867188, "logps/rejected": -293.2517395019531, "loss": 0.5307, "rewards/accuracies": 0.625, "rewards/chosen": -0.8211767077445984, "rewards/margins": 2.4203782081604004, "rewards/rejected": -3.2415549755096436, "step": 7872 }, { "epoch": 0.91, "learning_rate": 2.8128292169027273e-08, "logits/chosen": -3.1462223529815674, "logits/rejected": -2.664015293121338, "logps/chosen": -197.06451416015625, "logps/rejected": -236.86895751953125, "loss": 0.2916, "rewards/accuracies": 0.875, "rewards/chosen": -0.25734424591064453, "rewards/margins": 1.530692458152771, "rewards/rejected": -1.7880367040634155, "step": 7873 }, { "epoch": 0.91, "learning_rate": 2.809317569940302e-08, "logits/chosen": -3.772977352142334, "logits/rejected": -3.680572509765625, "logps/chosen": -233.95852661132812, "logps/rejected": -252.161865234375, "loss": 0.7992, "rewards/accuracies": 0.5, "rewards/chosen": -0.7417940497398376, "rewards/margins": 0.8490232229232788, "rewards/rejected": -1.5908172130584717, "step": 7874 }, { "epoch": 0.91, "learning_rate": 2.8058059229778765e-08, "logits/chosen": -3.300304412841797, "logits/rejected": -3.079775810241699, "logps/chosen": -151.00369262695312, "logps/rejected": -285.8630065917969, "loss": 0.5848, "rewards/accuracies": 0.875, "rewards/chosen": -0.49417024850845337, "rewards/margins": 0.948479413986206, "rewards/rejected": -1.4426497220993042, "step": 7875 }, { "epoch": 0.91, "learning_rate": 2.8022942760154512e-08, "logits/chosen": -3.6560425758361816, "logits/rejected": -3.9088521003723145, "logps/chosen": -202.6317138671875, "logps/rejected": -333.2917175292969, "loss": 0.5155, "rewards/accuracies": 0.75, "rewards/chosen": -0.30315718054771423, "rewards/margins": 3.923092842102051, "rewards/rejected": -4.226250648498535, "step": 7876 }, { "epoch": 0.91, "learning_rate": 2.798782629053026e-08, "logits/chosen": -2.489370584487915, "logits/rejected": -2.486433982849121, "logps/chosen": -263.4953918457031, "logps/rejected": -313.1733093261719, "loss": 0.2255, "rewards/accuracies": 1.0, "rewards/chosen": 0.2138318121433258, "rewards/margins": 2.683227300643921, "rewards/rejected": -2.469395637512207, "step": 7877 }, { "epoch": 0.91, "learning_rate": 2.7952709820906006e-08, "logits/chosen": -2.634549856185913, "logits/rejected": -3.066575765609741, "logps/chosen": -480.6684875488281, "logps/rejected": -320.5803527832031, "loss": 0.3566, "rewards/accuracies": 0.75, "rewards/chosen": 0.01476326584815979, "rewards/margins": 1.9138613939285278, "rewards/rejected": -1.8990981578826904, "step": 7878 }, { "epoch": 0.91, "learning_rate": 2.791759335128175e-08, "logits/chosen": -3.1958718299865723, "logits/rejected": -2.8537871837615967, "logps/chosen": -303.78094482421875, "logps/rejected": -272.33685302734375, "loss": 0.2241, "rewards/accuracies": 1.0, "rewards/chosen": 0.19052666425704956, "rewards/margins": 2.699345111846924, "rewards/rejected": -2.5088186264038086, "step": 7879 }, { "epoch": 0.91, "learning_rate": 2.7882476881657498e-08, "logits/chosen": -3.307584762573242, "logits/rejected": -3.6234841346740723, "logps/chosen": -127.79502868652344, "logps/rejected": -328.2816162109375, "loss": 0.1319, "rewards/accuracies": 1.0, "rewards/chosen": 0.019870907068252563, "rewards/margins": 4.384125232696533, "rewards/rejected": -4.364254474639893, "step": 7880 }, { "epoch": 0.91, "learning_rate": 2.7847360412033245e-08, "logits/chosen": -3.267671823501587, "logits/rejected": -3.2538702487945557, "logps/chosen": -296.01837158203125, "logps/rejected": -295.0340270996094, "loss": 0.9895, "rewards/accuracies": 0.625, "rewards/chosen": -0.38569048047065735, "rewards/margins": 0.42762237787246704, "rewards/rejected": -0.8133128881454468, "step": 7881 }, { "epoch": 0.91, "learning_rate": 2.781224394240899e-08, "logits/chosen": -3.6124510765075684, "logits/rejected": -3.346214771270752, "logps/chosen": -449.00482177734375, "logps/rejected": -227.291748046875, "loss": 0.3481, "rewards/accuracies": 0.75, "rewards/chosen": 0.26432085037231445, "rewards/margins": 2.1947319507598877, "rewards/rejected": -1.9304108619689941, "step": 7882 }, { "epoch": 0.91, "learning_rate": 2.7777127472784736e-08, "logits/chosen": -2.936983346939087, "logits/rejected": -2.981520652770996, "logps/chosen": -175.75306701660156, "logps/rejected": -174.70826721191406, "loss": 0.3671, "rewards/accuracies": 0.75, "rewards/chosen": 0.0986546128988266, "rewards/margins": 1.3175843954086304, "rewards/rejected": -1.2189298868179321, "step": 7883 }, { "epoch": 0.91, "learning_rate": 2.7742011003160483e-08, "logits/chosen": -2.891658306121826, "logits/rejected": -2.9772136211395264, "logps/chosen": -268.66192626953125, "logps/rejected": -497.1708068847656, "loss": 0.2174, "rewards/accuracies": 1.0, "rewards/chosen": 0.352385938167572, "rewards/margins": 2.681187391281128, "rewards/rejected": -2.3288016319274902, "step": 7884 }, { "epoch": 0.91, "learning_rate": 2.770689453353623e-08, "logits/chosen": -3.0808560848236084, "logits/rejected": -2.975882053375244, "logps/chosen": -271.6749572753906, "logps/rejected": -260.585693359375, "loss": 0.7898, "rewards/accuracies": 0.625, "rewards/chosen": -0.574550986289978, "rewards/margins": 0.7112786769866943, "rewards/rejected": -1.2858296632766724, "step": 7885 }, { "epoch": 0.91, "learning_rate": 2.7671778063911974e-08, "logits/chosen": -3.112577199935913, "logits/rejected": -3.4017016887664795, "logps/chosen": -243.94732666015625, "logps/rejected": -322.32257080078125, "loss": 0.2678, "rewards/accuracies": 0.875, "rewards/chosen": 0.009979836642742157, "rewards/margins": 2.3615102767944336, "rewards/rejected": -2.3515305519104004, "step": 7886 }, { "epoch": 0.91, "learning_rate": 2.7636661594287718e-08, "logits/chosen": -2.8162436485290527, "logits/rejected": -2.987809181213379, "logps/chosen": -258.6773986816406, "logps/rejected": -158.58470153808594, "loss": 0.4371, "rewards/accuracies": 0.875, "rewards/chosen": 0.05953305959701538, "rewards/margins": 1.2956877946853638, "rewards/rejected": -1.2361546754837036, "step": 7887 }, { "epoch": 0.91, "learning_rate": 2.7601545124663465e-08, "logits/chosen": -2.730985403060913, "logits/rejected": -2.709660530090332, "logps/chosen": -235.2060546875, "logps/rejected": -319.4970397949219, "loss": 0.4298, "rewards/accuracies": 0.75, "rewards/chosen": -0.21528835594654083, "rewards/margins": 1.7959524393081665, "rewards/rejected": -2.0112407207489014, "step": 7888 }, { "epoch": 0.91, "learning_rate": 2.756642865503921e-08, "logits/chosen": -2.9345836639404297, "logits/rejected": -2.687929153442383, "logps/chosen": -219.58438110351562, "logps/rejected": -206.67825317382812, "loss": 0.2475, "rewards/accuracies": 0.875, "rewards/chosen": 0.7437154054641724, "rewards/margins": 2.4127626419067383, "rewards/rejected": -1.6690471172332764, "step": 7889 }, { "epoch": 0.91, "learning_rate": 2.7531312185414957e-08, "logits/chosen": -3.2612295150756836, "logits/rejected": -3.310053825378418, "logps/chosen": -144.12173461914062, "logps/rejected": -181.5188751220703, "loss": 0.5019, "rewards/accuracies": 0.75, "rewards/chosen": 0.13782274723052979, "rewards/margins": 0.6889849305152893, "rewards/rejected": -0.5511621832847595, "step": 7890 }, { "epoch": 0.91, "learning_rate": 2.7496195715790704e-08, "logits/chosen": -2.817446708679199, "logits/rejected": -2.868762493133545, "logps/chosen": -220.50408935546875, "logps/rejected": -274.2353515625, "loss": 0.3007, "rewards/accuracies": 0.875, "rewards/chosen": -0.12293568253517151, "rewards/margins": 1.6589994430541992, "rewards/rejected": -1.7819350957870483, "step": 7891 }, { "epoch": 0.91, "learning_rate": 2.7461079246166448e-08, "logits/chosen": -2.9165406227111816, "logits/rejected": -3.0869510173797607, "logps/chosen": -313.349853515625, "logps/rejected": -240.09695434570312, "loss": 0.3508, "rewards/accuracies": 0.75, "rewards/chosen": -0.09044202417135239, "rewards/margins": 1.8668770790100098, "rewards/rejected": -1.9573192596435547, "step": 7892 }, { "epoch": 0.91, "learning_rate": 2.7425962776542195e-08, "logits/chosen": -2.560563087463379, "logits/rejected": -2.470278739929199, "logps/chosen": -244.05540466308594, "logps/rejected": -181.59225463867188, "loss": 0.2634, "rewards/accuracies": 1.0, "rewards/chosen": -0.015650849789381027, "rewards/margins": 1.3314273357391357, "rewards/rejected": -1.3470782041549683, "step": 7893 }, { "epoch": 0.91, "learning_rate": 2.7390846306917942e-08, "logits/chosen": -3.3210389614105225, "logits/rejected": -3.37764310836792, "logps/chosen": -246.11795043945312, "logps/rejected": -286.30517578125, "loss": 0.1285, "rewards/accuracies": 1.0, "rewards/chosen": 0.3584965169429779, "rewards/margins": 2.90488338470459, "rewards/rejected": -2.54638671875, "step": 7894 }, { "epoch": 0.91, "learning_rate": 2.735572983729369e-08, "logits/chosen": -3.4864871501922607, "logits/rejected": -3.3597018718719482, "logps/chosen": -171.92633056640625, "logps/rejected": -192.95787048339844, "loss": 0.4126, "rewards/accuracies": 0.875, "rewards/chosen": 0.07447750866413116, "rewards/margins": 1.434267282485962, "rewards/rejected": -1.3597898483276367, "step": 7895 }, { "epoch": 0.91, "learning_rate": 2.7320613367669433e-08, "logits/chosen": -3.5664775371551514, "logits/rejected": -3.6361324787139893, "logps/chosen": -321.92657470703125, "logps/rejected": -283.3940124511719, "loss": 0.5194, "rewards/accuracies": 0.75, "rewards/chosen": -0.2843543589115143, "rewards/margins": 1.7879265546798706, "rewards/rejected": -2.0722808837890625, "step": 7896 }, { "epoch": 0.91, "learning_rate": 2.728549689804518e-08, "logits/chosen": -2.695937395095825, "logits/rejected": -2.810237407684326, "logps/chosen": -323.49530029296875, "logps/rejected": -237.6997528076172, "loss": 0.3613, "rewards/accuracies": 0.75, "rewards/chosen": 0.11599382013082504, "rewards/margins": 1.2745610475540161, "rewards/rejected": -1.1585673093795776, "step": 7897 }, { "epoch": 0.91, "learning_rate": 2.7250380428420928e-08, "logits/chosen": -3.2684009075164795, "logits/rejected": -3.454017400741577, "logps/chosen": -269.02105712890625, "logps/rejected": -129.60650634765625, "loss": 0.2993, "rewards/accuracies": 0.875, "rewards/chosen": 0.051874756813049316, "rewards/margins": 1.8590662479400635, "rewards/rejected": -1.8071916103363037, "step": 7898 }, { "epoch": 0.91, "learning_rate": 2.7215263958796672e-08, "logits/chosen": -3.5919623374938965, "logits/rejected": -3.2112557888031006, "logps/chosen": -230.43789672851562, "logps/rejected": -209.43055725097656, "loss": 0.6966, "rewards/accuracies": 0.75, "rewards/chosen": -0.5634943842887878, "rewards/margins": 0.733788013458252, "rewards/rejected": -1.2972824573516846, "step": 7899 }, { "epoch": 0.91, "learning_rate": 2.718014748917242e-08, "logits/chosen": -4.027463912963867, "logits/rejected": -3.604475259780884, "logps/chosen": -212.96249389648438, "logps/rejected": -147.77073669433594, "loss": 0.4185, "rewards/accuracies": 0.875, "rewards/chosen": 0.03131598234176636, "rewards/margins": 1.8702412843704224, "rewards/rejected": -1.8389253616333008, "step": 7900 }, { "epoch": 0.91, "learning_rate": 2.7145031019548166e-08, "logits/chosen": -2.8121705055236816, "logits/rejected": -3.056072235107422, "logps/chosen": -302.20208740234375, "logps/rejected": -316.0823974609375, "loss": 0.3486, "rewards/accuracies": 0.875, "rewards/chosen": -0.23209147155284882, "rewards/margins": 2.0709238052368164, "rewards/rejected": -2.3030154705047607, "step": 7901 }, { "epoch": 0.91, "learning_rate": 2.7109914549923914e-08, "logits/chosen": -3.1160337924957275, "logits/rejected": -2.970256805419922, "logps/chosen": -260.2726745605469, "logps/rejected": -256.4873962402344, "loss": 0.2167, "rewards/accuracies": 1.0, "rewards/chosen": -0.5281132459640503, "rewards/margins": 1.973752737045288, "rewards/rejected": -2.501866102218628, "step": 7902 }, { "epoch": 0.91, "learning_rate": 2.7074798080299657e-08, "logits/chosen": -3.4578540325164795, "logits/rejected": -3.471820592880249, "logps/chosen": -274.9444274902344, "logps/rejected": -230.9774627685547, "loss": 0.4355, "rewards/accuracies": 0.875, "rewards/chosen": -0.22982528805732727, "rewards/margins": 2.7048535346984863, "rewards/rejected": -2.934678792953491, "step": 7903 }, { "epoch": 0.91, "learning_rate": 2.7039681610675405e-08, "logits/chosen": -2.650062322616577, "logits/rejected": -2.726612091064453, "logps/chosen": -394.8077697753906, "logps/rejected": -352.046875, "loss": 0.3408, "rewards/accuracies": 0.875, "rewards/chosen": 0.47377437353134155, "rewards/margins": 2.282484531402588, "rewards/rejected": -1.8087100982666016, "step": 7904 }, { "epoch": 0.91, "learning_rate": 2.7004565141051152e-08, "logits/chosen": -3.3896634578704834, "logits/rejected": -3.118875026702881, "logps/chosen": -254.33847045898438, "logps/rejected": -256.6885681152344, "loss": 0.358, "rewards/accuracies": 0.75, "rewards/chosen": 0.3688364624977112, "rewards/margins": 1.7750738859176636, "rewards/rejected": -1.4062374830245972, "step": 7905 }, { "epoch": 0.91, "learning_rate": 2.69694486714269e-08, "logits/chosen": -3.078901767730713, "logits/rejected": -3.3975675106048584, "logps/chosen": -275.5174560546875, "logps/rejected": -150.14845275878906, "loss": 0.6842, "rewards/accuracies": 0.625, "rewards/chosen": -0.5812190175056458, "rewards/margins": 0.6251519322395325, "rewards/rejected": -1.2063709497451782, "step": 7906 }, { "epoch": 0.91, "learning_rate": 2.6934332201802643e-08, "logits/chosen": -3.1360511779785156, "logits/rejected": -3.3958992958068848, "logps/chosen": -385.2325439453125, "logps/rejected": -250.38668823242188, "loss": 0.6845, "rewards/accuracies": 0.75, "rewards/chosen": -0.10780245810747147, "rewards/margins": 0.6432649493217468, "rewards/rejected": -0.7510673999786377, "step": 7907 }, { "epoch": 0.91, "learning_rate": 2.689921573217839e-08, "logits/chosen": -3.8994977474212646, "logits/rejected": -3.429996967315674, "logps/chosen": -276.8229675292969, "logps/rejected": -216.77377319335938, "loss": 0.2155, "rewards/accuracies": 1.0, "rewards/chosen": 0.12388042360544205, "rewards/margins": 2.059345006942749, "rewards/rejected": -1.93546462059021, "step": 7908 }, { "epoch": 0.91, "learning_rate": 2.6864099262554138e-08, "logits/chosen": -2.5549445152282715, "logits/rejected": -2.5712807178497314, "logps/chosen": -260.4879150390625, "logps/rejected": -192.489013671875, "loss": 0.5344, "rewards/accuracies": 0.75, "rewards/chosen": 0.19378383457660675, "rewards/margins": 0.7703691720962524, "rewards/rejected": -0.5765852332115173, "step": 7909 }, { "epoch": 0.91, "learning_rate": 2.682898279292988e-08, "logits/chosen": -2.973583698272705, "logits/rejected": -3.012664318084717, "logps/chosen": -154.38900756835938, "logps/rejected": -270.981201171875, "loss": 0.3534, "rewards/accuracies": 0.875, "rewards/chosen": -0.422580748796463, "rewards/margins": 1.9436957836151123, "rewards/rejected": -2.366276502609253, "step": 7910 }, { "epoch": 0.91, "learning_rate": 2.679386632330563e-08, "logits/chosen": -2.9126954078674316, "logits/rejected": -2.9284260272979736, "logps/chosen": -132.50607299804688, "logps/rejected": -240.022705078125, "loss": 0.5689, "rewards/accuracies": 0.75, "rewards/chosen": -0.40902048349380493, "rewards/margins": 1.454905390739441, "rewards/rejected": -1.8639259338378906, "step": 7911 }, { "epoch": 0.91, "learning_rate": 2.6758749853681376e-08, "logits/chosen": -3.243450164794922, "logits/rejected": -3.0728838443756104, "logps/chosen": -154.16836547851562, "logps/rejected": -214.31362915039062, "loss": 0.2442, "rewards/accuracies": 0.875, "rewards/chosen": 0.003006555140018463, "rewards/margins": 1.80141282081604, "rewards/rejected": -1.7984063625335693, "step": 7912 }, { "epoch": 0.91, "learning_rate": 2.6723633384057123e-08, "logits/chosen": -3.4315383434295654, "logits/rejected": -3.5121498107910156, "logps/chosen": -465.10430908203125, "logps/rejected": -407.72296142578125, "loss": 0.2166, "rewards/accuracies": 0.875, "rewards/chosen": 0.3533813953399658, "rewards/margins": 2.2711703777313232, "rewards/rejected": -1.9177889823913574, "step": 7913 }, { "epoch": 0.91, "learning_rate": 2.6688516914432867e-08, "logits/chosen": -2.97613787651062, "logits/rejected": -3.162426710128784, "logps/chosen": -623.1483764648438, "logps/rejected": -288.71356201171875, "loss": 0.54, "rewards/accuracies": 0.625, "rewards/chosen": -0.6821424961090088, "rewards/margins": 0.8123636245727539, "rewards/rejected": -1.4945061206817627, "step": 7914 }, { "epoch": 0.91, "learning_rate": 2.6653400444808614e-08, "logits/chosen": -3.4631102085113525, "logits/rejected": -3.1367695331573486, "logps/chosen": -246.68331909179688, "logps/rejected": -260.56341552734375, "loss": 0.1488, "rewards/accuracies": 1.0, "rewards/chosen": 0.197942852973938, "rewards/margins": 3.3274903297424316, "rewards/rejected": -3.129547595977783, "step": 7915 }, { "epoch": 0.91, "learning_rate": 2.661828397518436e-08, "logits/chosen": -3.1834874153137207, "logits/rejected": -3.4566903114318848, "logps/chosen": -265.06097412109375, "logps/rejected": -244.05780029296875, "loss": 0.3483, "rewards/accuracies": 0.875, "rewards/chosen": -0.10610383749008179, "rewards/margins": 1.797206163406372, "rewards/rejected": -1.9033100605010986, "step": 7916 }, { "epoch": 0.91, "learning_rate": 2.6583167505560106e-08, "logits/chosen": -3.790099859237671, "logits/rejected": -3.383934259414673, "logps/chosen": -330.03240966796875, "logps/rejected": -313.65985107421875, "loss": 0.2171, "rewards/accuracies": 1.0, "rewards/chosen": -0.024107974022626877, "rewards/margins": 3.2243809700012207, "rewards/rejected": -3.248488664627075, "step": 7917 }, { "epoch": 0.91, "learning_rate": 2.6548051035935853e-08, "logits/chosen": -3.014209270477295, "logits/rejected": -2.916205406188965, "logps/chosen": -265.4150390625, "logps/rejected": -230.60015869140625, "loss": 0.3307, "rewards/accuracies": 0.75, "rewards/chosen": 0.16199035942554474, "rewards/margins": 2.5398664474487305, "rewards/rejected": -2.3778762817382812, "step": 7918 }, { "epoch": 0.91, "learning_rate": 2.65129345663116e-08, "logits/chosen": -2.4960522651672363, "logits/rejected": -2.6684045791625977, "logps/chosen": -321.713134765625, "logps/rejected": -290.3084716796875, "loss": 0.1681, "rewards/accuracies": 1.0, "rewards/chosen": 0.38368362188339233, "rewards/margins": 2.7201507091522217, "rewards/rejected": -2.3364672660827637, "step": 7919 }, { "epoch": 0.91, "learning_rate": 2.6477818096687347e-08, "logits/chosen": -2.906826972961426, "logits/rejected": -2.8093433380126953, "logps/chosen": -350.14129638671875, "logps/rejected": -262.18115234375, "loss": 0.2332, "rewards/accuracies": 0.875, "rewards/chosen": -0.0791972205042839, "rewards/margins": 2.1768746376037598, "rewards/rejected": -2.2560718059539795, "step": 7920 }, { "epoch": 0.91, "learning_rate": 2.644270162706309e-08, "logits/chosen": -3.1663944721221924, "logits/rejected": -3.010119915008545, "logps/chosen": -218.75100708007812, "logps/rejected": -225.37091064453125, "loss": 0.3221, "rewards/accuracies": 0.75, "rewards/chosen": -0.22143548727035522, "rewards/margins": 2.568321704864502, "rewards/rejected": -2.789757251739502, "step": 7921 }, { "epoch": 0.91, "learning_rate": 2.640758515743884e-08, "logits/chosen": -2.877117872238159, "logits/rejected": -3.0283703804016113, "logps/chosen": -129.39622497558594, "logps/rejected": -255.99874877929688, "loss": 0.691, "rewards/accuracies": 0.5, "rewards/chosen": -0.5295476913452148, "rewards/margins": 0.6724177002906799, "rewards/rejected": -1.2019654512405396, "step": 7922 }, { "epoch": 0.91, "learning_rate": 2.6372468687814586e-08, "logits/chosen": -2.7801549434661865, "logits/rejected": -2.735271692276001, "logps/chosen": -249.81961059570312, "logps/rejected": -183.65936279296875, "loss": 0.1418, "rewards/accuracies": 1.0, "rewards/chosen": 0.3297932744026184, "rewards/margins": 2.4324135780334473, "rewards/rejected": -2.1026203632354736, "step": 7923 }, { "epoch": 0.91, "learning_rate": 2.633735221819033e-08, "logits/chosen": -2.4594147205352783, "logits/rejected": -2.4480416774749756, "logps/chosen": -386.9161376953125, "logps/rejected": -285.9794616699219, "loss": 0.5487, "rewards/accuracies": 0.75, "rewards/chosen": -0.2700170874595642, "rewards/margins": 1.8158276081085205, "rewards/rejected": -2.0858447551727295, "step": 7924 }, { "epoch": 0.91, "learning_rate": 2.6302235748566077e-08, "logits/chosen": -3.1329729557037354, "logits/rejected": -3.4244697093963623, "logps/chosen": -399.0824890136719, "logps/rejected": -232.18731689453125, "loss": 0.1938, "rewards/accuracies": 1.0, "rewards/chosen": 0.5136888027191162, "rewards/margins": 2.299452543258667, "rewards/rejected": -1.7857637405395508, "step": 7925 }, { "epoch": 0.91, "learning_rate": 2.6267119278941824e-08, "logits/chosen": -3.303642749786377, "logits/rejected": -3.2839174270629883, "logps/chosen": -151.54495239257812, "logps/rejected": -199.93118286132812, "loss": 0.1854, "rewards/accuracies": 1.0, "rewards/chosen": 0.6612294316291809, "rewards/margins": 3.2654836177825928, "rewards/rejected": -2.6042542457580566, "step": 7926 }, { "epoch": 0.91, "learning_rate": 2.623200280931757e-08, "logits/chosen": -2.9629931449890137, "logits/rejected": -2.4939796924591064, "logps/chosen": -366.44586181640625, "logps/rejected": -331.01690673828125, "loss": 0.4422, "rewards/accuracies": 0.75, "rewards/chosen": -0.5571781396865845, "rewards/margins": 1.5492804050445557, "rewards/rejected": -2.1064586639404297, "step": 7927 }, { "epoch": 0.91, "learning_rate": 2.6196886339693315e-08, "logits/chosen": -2.859267234802246, "logits/rejected": -2.9977216720581055, "logps/chosen": -460.93475341796875, "logps/rejected": -161.92727661132812, "loss": 0.8193, "rewards/accuracies": 0.375, "rewards/chosen": -0.33396315574645996, "rewards/margins": 0.6467481851577759, "rewards/rejected": -0.9807113409042358, "step": 7928 }, { "epoch": 0.91, "learning_rate": 2.6161769870069063e-08, "logits/chosen": -2.709243059158325, "logits/rejected": -2.981123924255371, "logps/chosen": -266.0618896484375, "logps/rejected": -301.1204528808594, "loss": 0.4513, "rewards/accuracies": 0.75, "rewards/chosen": -0.2805801331996918, "rewards/margins": 1.4517219066619873, "rewards/rejected": -1.732301950454712, "step": 7929 }, { "epoch": 0.91, "learning_rate": 2.612665340044481e-08, "logits/chosen": -2.61173415184021, "logits/rejected": -2.776883125305176, "logps/chosen": -481.8065185546875, "logps/rejected": -298.9688415527344, "loss": 0.2557, "rewards/accuracies": 0.875, "rewards/chosen": 0.2324690818786621, "rewards/margins": 3.3635215759277344, "rewards/rejected": -3.1310524940490723, "step": 7930 }, { "epoch": 0.91, "learning_rate": 2.6091536930820557e-08, "logits/chosen": -3.152933120727539, "logits/rejected": -3.1133902072906494, "logps/chosen": -351.9236145019531, "logps/rejected": -251.25054931640625, "loss": 0.4405, "rewards/accuracies": 0.625, "rewards/chosen": 0.282807856798172, "rewards/margins": 1.6621642112731934, "rewards/rejected": -1.3793562650680542, "step": 7931 }, { "epoch": 0.91, "learning_rate": 2.60564204611963e-08, "logits/chosen": -3.7819626331329346, "logits/rejected": -3.4273838996887207, "logps/chosen": -441.37396240234375, "logps/rejected": -303.412109375, "loss": 0.3585, "rewards/accuracies": 0.875, "rewards/chosen": -0.4512269198894501, "rewards/margins": 2.3583033084869385, "rewards/rejected": -2.809530258178711, "step": 7932 }, { "epoch": 0.91, "learning_rate": 2.6021303991572048e-08, "logits/chosen": -3.1561684608459473, "logits/rejected": -2.8332066535949707, "logps/chosen": -435.2096252441406, "logps/rejected": -362.68121337890625, "loss": 0.185, "rewards/accuracies": 1.0, "rewards/chosen": 0.37212514877319336, "rewards/margins": 2.327362537384033, "rewards/rejected": -1.9552375078201294, "step": 7933 }, { "epoch": 0.91, "learning_rate": 2.5986187521947795e-08, "logits/chosen": -3.251254081726074, "logits/rejected": -3.2636234760284424, "logps/chosen": -163.11953735351562, "logps/rejected": -212.18124389648438, "loss": 0.3818, "rewards/accuracies": 0.875, "rewards/chosen": 0.0008269846439361572, "rewards/margins": 2.405949354171753, "rewards/rejected": -2.4051222801208496, "step": 7934 }, { "epoch": 0.91, "learning_rate": 2.5951071052323536e-08, "logits/chosen": -3.4906299114227295, "logits/rejected": -3.493117094039917, "logps/chosen": -211.20407104492188, "logps/rejected": -162.78024291992188, "loss": 0.344, "rewards/accuracies": 0.75, "rewards/chosen": 0.33041495084762573, "rewards/margins": 1.8704743385314941, "rewards/rejected": -1.5400593280792236, "step": 7935 }, { "epoch": 0.91, "learning_rate": 2.5915954582699283e-08, "logits/chosen": -3.649817943572998, "logits/rejected": -3.2017147541046143, "logps/chosen": -300.1605224609375, "logps/rejected": -173.2865447998047, "loss": 0.3426, "rewards/accuracies": 0.875, "rewards/chosen": 0.6705621480941772, "rewards/margins": 2.455692768096924, "rewards/rejected": -1.785130500793457, "step": 7936 }, { "epoch": 0.91, "learning_rate": 2.588083811307503e-08, "logits/chosen": -3.5021328926086426, "logits/rejected": -3.28826904296875, "logps/chosen": -303.6654357910156, "logps/rejected": -160.97396850585938, "loss": 0.3252, "rewards/accuracies": 0.75, "rewards/chosen": 0.28051722049713135, "rewards/margins": 1.4995503425598145, "rewards/rejected": -1.2190332412719727, "step": 7937 }, { "epoch": 0.92, "learning_rate": 2.5845721643450774e-08, "logits/chosen": -3.2079715728759766, "logits/rejected": -2.870375156402588, "logps/chosen": -261.8691101074219, "logps/rejected": -229.22161865234375, "loss": 0.5288, "rewards/accuracies": 0.75, "rewards/chosen": -0.8226915597915649, "rewards/margins": 0.711804986000061, "rewards/rejected": -1.534496545791626, "step": 7938 }, { "epoch": 0.92, "learning_rate": 2.581060517382652e-08, "logits/chosen": -3.7844393253326416, "logits/rejected": -3.6793339252471924, "logps/chosen": -128.09764099121094, "logps/rejected": -192.2578582763672, "loss": 0.3379, "rewards/accuracies": 0.875, "rewards/chosen": -0.7517159581184387, "rewards/margins": 1.8626763820648193, "rewards/rejected": -2.6143925189971924, "step": 7939 }, { "epoch": 0.92, "learning_rate": 2.577548870420227e-08, "logits/chosen": -3.0112953186035156, "logits/rejected": -2.7731447219848633, "logps/chosen": -261.6444091796875, "logps/rejected": -354.0215148925781, "loss": 0.1826, "rewards/accuracies": 1.0, "rewards/chosen": -0.10158685594797134, "rewards/margins": 3.353416919708252, "rewards/rejected": -3.4550042152404785, "step": 7940 }, { "epoch": 0.92, "learning_rate": 2.5740372234578013e-08, "logits/chosen": -3.112687826156616, "logits/rejected": -2.903775930404663, "logps/chosen": -229.68714904785156, "logps/rejected": -347.0059509277344, "loss": 0.3166, "rewards/accuracies": 1.0, "rewards/chosen": 0.47899460792541504, "rewards/margins": 1.352095127105713, "rewards/rejected": -0.8731005191802979, "step": 7941 }, { "epoch": 0.92, "learning_rate": 2.570525576495376e-08, "logits/chosen": -3.2223713397979736, "logits/rejected": -3.1203532218933105, "logps/chosen": -312.038330078125, "logps/rejected": -299.94873046875, "loss": 0.2391, "rewards/accuracies": 0.875, "rewards/chosen": -0.16063238680362701, "rewards/margins": 2.8604888916015625, "rewards/rejected": -3.0211212635040283, "step": 7942 }, { "epoch": 0.92, "learning_rate": 2.5670139295329507e-08, "logits/chosen": -3.619633674621582, "logits/rejected": -3.680840492248535, "logps/chosen": -353.4227294921875, "logps/rejected": -286.24713134765625, "loss": 0.2338, "rewards/accuracies": 1.0, "rewards/chosen": 0.12400294095277786, "rewards/margins": 2.145894765853882, "rewards/rejected": -2.0218915939331055, "step": 7943 }, { "epoch": 0.92, "learning_rate": 2.5635022825705255e-08, "logits/chosen": -2.846874237060547, "logits/rejected": -3.0241894721984863, "logps/chosen": -282.9507751464844, "logps/rejected": -448.26727294921875, "loss": 0.1688, "rewards/accuracies": 0.875, "rewards/chosen": 0.08168859779834747, "rewards/margins": 2.887544631958008, "rewards/rejected": -2.805856227874756, "step": 7944 }, { "epoch": 0.92, "learning_rate": 2.5599906356081e-08, "logits/chosen": -2.7325515747070312, "logits/rejected": -2.809171199798584, "logps/chosen": -258.73052978515625, "logps/rejected": -252.80880737304688, "loss": 0.6399, "rewards/accuracies": 0.625, "rewards/chosen": -0.9340596795082092, "rewards/margins": 2.030808210372925, "rewards/rejected": -2.9648678302764893, "step": 7945 }, { "epoch": 0.92, "learning_rate": 2.5564789886456746e-08, "logits/chosen": -2.9240994453430176, "logits/rejected": -2.8437981605529785, "logps/chosen": -261.47528076171875, "logps/rejected": -274.9072570800781, "loss": 0.3744, "rewards/accuracies": 0.75, "rewards/chosen": -0.15589338541030884, "rewards/margins": 1.4081629514694214, "rewards/rejected": -1.564056396484375, "step": 7946 }, { "epoch": 0.92, "learning_rate": 2.5529673416832493e-08, "logits/chosen": -2.975680112838745, "logits/rejected": -3.029426097869873, "logps/chosen": -323.5231018066406, "logps/rejected": -208.8004150390625, "loss": 0.4295, "rewards/accuracies": 0.75, "rewards/chosen": -0.2919239401817322, "rewards/margins": 1.6094872951507568, "rewards/rejected": -1.9014112949371338, "step": 7947 }, { "epoch": 0.92, "learning_rate": 2.549455694720824e-08, "logits/chosen": -2.8188319206237793, "logits/rejected": -2.863424301147461, "logps/chosen": -251.12132263183594, "logps/rejected": -243.0237579345703, "loss": 0.6118, "rewards/accuracies": 0.625, "rewards/chosen": -0.07408449798822403, "rewards/margins": 0.7702306509017944, "rewards/rejected": -0.8443151712417603, "step": 7948 }, { "epoch": 0.92, "learning_rate": 2.5459440477583984e-08, "logits/chosen": -2.7839198112487793, "logits/rejected": -3.020292043685913, "logps/chosen": -101.5988540649414, "logps/rejected": -95.53126525878906, "loss": 0.3847, "rewards/accuracies": 0.75, "rewards/chosen": -0.006349533796310425, "rewards/margins": 1.4300590753555298, "rewards/rejected": -1.4364086389541626, "step": 7949 }, { "epoch": 0.92, "learning_rate": 2.542432400795973e-08, "logits/chosen": -2.3427493572235107, "logits/rejected": -2.6410818099975586, "logps/chosen": -484.6178894042969, "logps/rejected": -396.8139343261719, "loss": 0.4795, "rewards/accuracies": 0.625, "rewards/chosen": -0.21733418107032776, "rewards/margins": 1.455613374710083, "rewards/rejected": -1.6729474067687988, "step": 7950 }, { "epoch": 0.92, "learning_rate": 2.538920753833548e-08, "logits/chosen": -3.341948986053467, "logits/rejected": -2.842869281768799, "logps/chosen": -307.70416259765625, "logps/rejected": -291.4599304199219, "loss": 0.6554, "rewards/accuracies": 0.75, "rewards/chosen": -0.12628404796123505, "rewards/margins": 1.4938112497329712, "rewards/rejected": -1.6200952529907227, "step": 7951 }, { "epoch": 0.92, "learning_rate": 2.5354091068711222e-08, "logits/chosen": -2.6168055534362793, "logits/rejected": -2.5283749103546143, "logps/chosen": -288.567138671875, "logps/rejected": -361.37353515625, "loss": 0.1747, "rewards/accuracies": 0.875, "rewards/chosen": 0.314666748046875, "rewards/margins": 3.100090265274048, "rewards/rejected": -2.785423755645752, "step": 7952 }, { "epoch": 0.92, "learning_rate": 2.531897459908697e-08, "logits/chosen": -3.1522672176361084, "logits/rejected": -3.123307228088379, "logps/chosen": -206.51446533203125, "logps/rejected": -211.52706909179688, "loss": 0.8763, "rewards/accuracies": 0.5, "rewards/chosen": -0.44226834177970886, "rewards/margins": 0.3003653287887573, "rewards/rejected": -0.7426337003707886, "step": 7953 }, { "epoch": 0.92, "learning_rate": 2.5283858129462717e-08, "logits/chosen": -3.1563916206359863, "logits/rejected": -3.405907392501831, "logps/chosen": -170.27017211914062, "logps/rejected": -213.7570037841797, "loss": 0.5951, "rewards/accuracies": 0.625, "rewards/chosen": -0.26567983627319336, "rewards/margins": 1.003861427307129, "rewards/rejected": -1.2695412635803223, "step": 7954 }, { "epoch": 0.92, "learning_rate": 2.5248741659838464e-08, "logits/chosen": -4.2164530754089355, "logits/rejected": -3.8874258995056152, "logps/chosen": -713.1478881835938, "logps/rejected": -444.2315673828125, "loss": 0.1111, "rewards/accuracies": 1.0, "rewards/chosen": -0.006337761878967285, "rewards/margins": 3.0571205615997314, "rewards/rejected": -3.0634584426879883, "step": 7955 }, { "epoch": 0.92, "learning_rate": 2.5213625190214208e-08, "logits/chosen": -2.900765895843506, "logits/rejected": -2.9156994819641113, "logps/chosen": -169.92568969726562, "logps/rejected": -287.5469970703125, "loss": 0.1925, "rewards/accuracies": 0.875, "rewards/chosen": -0.1827513575553894, "rewards/margins": 3.9212982654571533, "rewards/rejected": -4.1040496826171875, "step": 7956 }, { "epoch": 0.92, "learning_rate": 2.5178508720589955e-08, "logits/chosen": -3.8262407779693604, "logits/rejected": -3.6091339588165283, "logps/chosen": -354.7514343261719, "logps/rejected": -263.23968505859375, "loss": 0.4057, "rewards/accuracies": 0.875, "rewards/chosen": -0.016332536935806274, "rewards/margins": 1.1269266605377197, "rewards/rejected": -1.1432591676712036, "step": 7957 }, { "epoch": 0.92, "learning_rate": 2.5143392250965703e-08, "logits/chosen": -3.5838255882263184, "logits/rejected": -3.1417033672332764, "logps/chosen": -298.9044494628906, "logps/rejected": -256.43865966796875, "loss": 0.119, "rewards/accuracies": 1.0, "rewards/chosen": 0.30571049451828003, "rewards/margins": 3.4428939819335938, "rewards/rejected": -3.13718318939209, "step": 7958 }, { "epoch": 0.92, "learning_rate": 2.5108275781341447e-08, "logits/chosen": -2.2856366634368896, "logits/rejected": -2.7725045680999756, "logps/chosen": -245.8197021484375, "logps/rejected": -316.387451171875, "loss": 0.13, "rewards/accuracies": 1.0, "rewards/chosen": 0.20989608764648438, "rewards/margins": 2.463804006576538, "rewards/rejected": -2.2539079189300537, "step": 7959 }, { "epoch": 0.92, "learning_rate": 2.5073159311717194e-08, "logits/chosen": -3.3323707580566406, "logits/rejected": -3.363924741744995, "logps/chosen": -192.63897705078125, "logps/rejected": -204.76016235351562, "loss": 0.7221, "rewards/accuracies": 0.625, "rewards/chosen": -1.297797441482544, "rewards/margins": 0.5280490517616272, "rewards/rejected": -1.8258464336395264, "step": 7960 }, { "epoch": 0.92, "learning_rate": 2.503804284209294e-08, "logits/chosen": -2.785418748855591, "logits/rejected": -2.6745078563690186, "logps/chosen": -452.9596252441406, "logps/rejected": -364.5073547363281, "loss": 0.4622, "rewards/accuracies": 0.75, "rewards/chosen": -0.3088122606277466, "rewards/margins": 1.1348011493682861, "rewards/rejected": -1.4436134099960327, "step": 7961 }, { "epoch": 0.92, "learning_rate": 2.5002926372468688e-08, "logits/chosen": -3.2402262687683105, "logits/rejected": -3.0054268836975098, "logps/chosen": -231.65277099609375, "logps/rejected": -295.12261962890625, "loss": 0.2422, "rewards/accuracies": 1.0, "rewards/chosen": 0.06966429948806763, "rewards/margins": 2.129422426223755, "rewards/rejected": -2.059757947921753, "step": 7962 }, { "epoch": 0.92, "learning_rate": 2.4967809902844432e-08, "logits/chosen": -2.662245512008667, "logits/rejected": -2.740061044692993, "logps/chosen": -320.7571105957031, "logps/rejected": -271.4331359863281, "loss": 0.5355, "rewards/accuracies": 0.625, "rewards/chosen": -0.35642755031585693, "rewards/margins": 1.6217944622039795, "rewards/rejected": -1.9782218933105469, "step": 7963 }, { "epoch": 0.92, "learning_rate": 2.493269343322018e-08, "logits/chosen": -2.7573680877685547, "logits/rejected": -2.5379068851470947, "logps/chosen": -261.2873840332031, "logps/rejected": -280.81829833984375, "loss": 0.5063, "rewards/accuracies": 0.625, "rewards/chosen": 0.36269548535346985, "rewards/margins": 1.0935238599777222, "rewards/rejected": -0.7308283448219299, "step": 7964 }, { "epoch": 0.92, "learning_rate": 2.4897576963595927e-08, "logits/chosen": -2.955077886581421, "logits/rejected": -2.870755434036255, "logps/chosen": -194.1336212158203, "logps/rejected": -288.6918029785156, "loss": 0.2323, "rewards/accuracies": 1.0, "rewards/chosen": 0.42971518635749817, "rewards/margins": 2.7184877395629883, "rewards/rejected": -2.2887725830078125, "step": 7965 }, { "epoch": 0.92, "learning_rate": 2.4862460493971674e-08, "logits/chosen": -3.3210458755493164, "logits/rejected": -3.044792413711548, "logps/chosen": -198.91635131835938, "logps/rejected": -238.81149291992188, "loss": 0.3515, "rewards/accuracies": 0.875, "rewards/chosen": -0.3496764004230499, "rewards/margins": 2.8651082515716553, "rewards/rejected": -3.214784622192383, "step": 7966 }, { "epoch": 0.92, "learning_rate": 2.4827344024347418e-08, "logits/chosen": -2.655428886413574, "logits/rejected": -2.953444242477417, "logps/chosen": -218.8328857421875, "logps/rejected": -219.76229858398438, "loss": 0.3842, "rewards/accuracies": 0.75, "rewards/chosen": 0.08510073274374008, "rewards/margins": 2.2906980514526367, "rewards/rejected": -2.205597400665283, "step": 7967 }, { "epoch": 0.92, "learning_rate": 2.4792227554723165e-08, "logits/chosen": -3.479766368865967, "logits/rejected": -3.4668102264404297, "logps/chosen": -241.31056213378906, "logps/rejected": -283.42034912109375, "loss": 0.41, "rewards/accuracies": 0.75, "rewards/chosen": 0.3198361098766327, "rewards/margins": 2.563183307647705, "rewards/rejected": -2.243346929550171, "step": 7968 }, { "epoch": 0.92, "learning_rate": 2.4757111085098912e-08, "logits/chosen": -3.948929786682129, "logits/rejected": -3.797544479370117, "logps/chosen": -185.3629608154297, "logps/rejected": -249.7896270751953, "loss": 0.4294, "rewards/accuracies": 0.875, "rewards/chosen": -0.3182118535041809, "rewards/margins": 0.8638822436332703, "rewards/rejected": -1.1820940971374512, "step": 7969 }, { "epoch": 0.92, "learning_rate": 2.4721994615474656e-08, "logits/chosen": -2.9249064922332764, "logits/rejected": -3.0677285194396973, "logps/chosen": -287.0469665527344, "logps/rejected": -299.1763916015625, "loss": 0.3716, "rewards/accuracies": 0.875, "rewards/chosen": -0.2654849886894226, "rewards/margins": 2.8185348510742188, "rewards/rejected": -3.084019660949707, "step": 7970 }, { "epoch": 0.92, "learning_rate": 2.4686878145850403e-08, "logits/chosen": -2.758213520050049, "logits/rejected": -2.9135236740112305, "logps/chosen": -304.6352233886719, "logps/rejected": -268.0983581542969, "loss": 0.3049, "rewards/accuracies": 0.875, "rewards/chosen": 0.13260594010353088, "rewards/margins": 1.695177674293518, "rewards/rejected": -1.5625717639923096, "step": 7971 }, { "epoch": 0.92, "learning_rate": 2.465176167622615e-08, "logits/chosen": -2.964590072631836, "logits/rejected": -2.989288806915283, "logps/chosen": -249.7349090576172, "logps/rejected": -193.16195678710938, "loss": 0.4111, "rewards/accuracies": 0.875, "rewards/chosen": -0.1861923187971115, "rewards/margins": 1.887162208557129, "rewards/rejected": -2.073354721069336, "step": 7972 }, { "epoch": 0.92, "learning_rate": 2.4616645206601898e-08, "logits/chosen": -2.770979404449463, "logits/rejected": -2.7881431579589844, "logps/chosen": -433.44921875, "logps/rejected": -349.163330078125, "loss": 0.1828, "rewards/accuracies": 1.0, "rewards/chosen": -0.47925668954849243, "rewards/margins": 2.566767930984497, "rewards/rejected": -3.0460243225097656, "step": 7973 }, { "epoch": 0.92, "learning_rate": 2.4581528736977642e-08, "logits/chosen": -3.7025179862976074, "logits/rejected": -3.5895352363586426, "logps/chosen": -320.334228515625, "logps/rejected": -182.38491821289062, "loss": 0.3893, "rewards/accuracies": 0.625, "rewards/chosen": -0.09739457815885544, "rewards/margins": 2.0112223625183105, "rewards/rejected": -2.108616828918457, "step": 7974 }, { "epoch": 0.92, "learning_rate": 2.454641226735339e-08, "logits/chosen": -3.7679660320281982, "logits/rejected": -3.4078102111816406, "logps/chosen": -352.31549072265625, "logps/rejected": -270.2923278808594, "loss": 0.3361, "rewards/accuracies": 0.875, "rewards/chosen": 0.4060327410697937, "rewards/margins": 3.0678670406341553, "rewards/rejected": -2.661834239959717, "step": 7975 }, { "epoch": 0.92, "learning_rate": 2.4511295797729136e-08, "logits/chosen": -3.1979784965515137, "logits/rejected": -3.0890536308288574, "logps/chosen": -203.72900390625, "logps/rejected": -251.32192993164062, "loss": 0.2217, "rewards/accuracies": 1.0, "rewards/chosen": -0.08005403727293015, "rewards/margins": 1.859372854232788, "rewards/rejected": -1.9394267797470093, "step": 7976 }, { "epoch": 0.92, "learning_rate": 2.447617932810488e-08, "logits/chosen": -2.938526153564453, "logits/rejected": -2.892923593521118, "logps/chosen": -235.83287048339844, "logps/rejected": -259.1390686035156, "loss": 0.8747, "rewards/accuracies": 0.625, "rewards/chosen": -0.6366168856620789, "rewards/margins": 0.8398971557617188, "rewards/rejected": -1.4765139818191528, "step": 7977 }, { "epoch": 0.92, "learning_rate": 2.4441062858480628e-08, "logits/chosen": -3.272444725036621, "logits/rejected": -3.118973970413208, "logps/chosen": -266.59722900390625, "logps/rejected": -224.6721954345703, "loss": 0.4204, "rewards/accuracies": 0.75, "rewards/chosen": -0.38437366485595703, "rewards/margins": 1.6319831609725952, "rewards/rejected": -2.016356945037842, "step": 7978 }, { "epoch": 0.92, "learning_rate": 2.4405946388856375e-08, "logits/chosen": -3.2520971298217773, "logits/rejected": -3.2545907497406006, "logps/chosen": -98.0849380493164, "logps/rejected": -213.66030883789062, "loss": 0.5117, "rewards/accuracies": 0.625, "rewards/chosen": -0.32629820704460144, "rewards/margins": 2.3057079315185547, "rewards/rejected": -2.6320064067840576, "step": 7979 }, { "epoch": 0.92, "learning_rate": 2.4370829919232122e-08, "logits/chosen": -3.9352660179138184, "logits/rejected": -3.9203040599823, "logps/chosen": -385.2655334472656, "logps/rejected": -351.1178894042969, "loss": 0.2015, "rewards/accuracies": 1.0, "rewards/chosen": 0.40106192231178284, "rewards/margins": 1.67147958278656, "rewards/rejected": -1.2704176902770996, "step": 7980 }, { "epoch": 0.92, "learning_rate": 2.4335713449607866e-08, "logits/chosen": -2.601738691329956, "logits/rejected": -2.3341872692108154, "logps/chosen": -256.02923583984375, "logps/rejected": -234.41331481933594, "loss": 0.4783, "rewards/accuracies": 0.75, "rewards/chosen": -0.5755335092544556, "rewards/margins": 0.8575092554092407, "rewards/rejected": -1.4330426454544067, "step": 7981 }, { "epoch": 0.92, "learning_rate": 2.430059697998361e-08, "logits/chosen": -2.8481054306030273, "logits/rejected": -2.89680814743042, "logps/chosen": -112.02164459228516, "logps/rejected": -262.2196044921875, "loss": 0.2213, "rewards/accuracies": 0.75, "rewards/chosen": 0.22728495299816132, "rewards/margins": 3.0434162616729736, "rewards/rejected": -2.816131353378296, "step": 7982 }, { "epoch": 0.92, "learning_rate": 2.4265480510359357e-08, "logits/chosen": -2.837524175643921, "logits/rejected": -2.8734920024871826, "logps/chosen": -137.19505310058594, "logps/rejected": -267.20635986328125, "loss": 0.5607, "rewards/accuracies": 0.625, "rewards/chosen": -0.6975009441375732, "rewards/margins": 1.4645583629608154, "rewards/rejected": -2.1620593070983887, "step": 7983 }, { "epoch": 0.92, "learning_rate": 2.42303640407351e-08, "logits/chosen": -3.722883462905884, "logits/rejected": -3.9376885890960693, "logps/chosen": -156.9109344482422, "logps/rejected": -213.02793884277344, "loss": 0.1845, "rewards/accuracies": 0.875, "rewards/chosen": 0.08633880317211151, "rewards/margins": 3.663761615753174, "rewards/rejected": -3.577422857284546, "step": 7984 }, { "epoch": 0.92, "learning_rate": 2.4195247571110848e-08, "logits/chosen": -3.7101268768310547, "logits/rejected": -3.563126802444458, "logps/chosen": -268.6546630859375, "logps/rejected": -207.15771484375, "loss": 0.3225, "rewards/accuracies": 0.875, "rewards/chosen": 0.2198849320411682, "rewards/margins": 1.6496437788009644, "rewards/rejected": -1.4297587871551514, "step": 7985 }, { "epoch": 0.92, "learning_rate": 2.4160131101486595e-08, "logits/chosen": -2.9743704795837402, "logits/rejected": -3.19992733001709, "logps/chosen": -155.3919219970703, "logps/rejected": -369.80303955078125, "loss": 0.4091, "rewards/accuracies": 0.75, "rewards/chosen": -0.07224000245332718, "rewards/margins": 2.241482734680176, "rewards/rejected": -2.313723087310791, "step": 7986 }, { "epoch": 0.92, "learning_rate": 2.412501463186234e-08, "logits/chosen": -2.914078950881958, "logits/rejected": -3.123157024383545, "logps/chosen": -348.9054260253906, "logps/rejected": -231.1888427734375, "loss": 0.2336, "rewards/accuracies": 1.0, "rewards/chosen": 0.023736730217933655, "rewards/margins": 1.9931539297103882, "rewards/rejected": -1.9694173336029053, "step": 7987 }, { "epoch": 0.92, "learning_rate": 2.4089898162238087e-08, "logits/chosen": -2.8585307598114014, "logits/rejected": -3.3647897243499756, "logps/chosen": -404.0294494628906, "logps/rejected": -353.5616149902344, "loss": 0.1346, "rewards/accuracies": 1.0, "rewards/chosen": 0.47464534640312195, "rewards/margins": 2.4419124126434326, "rewards/rejected": -1.9672671556472778, "step": 7988 }, { "epoch": 0.92, "learning_rate": 2.4054781692613834e-08, "logits/chosen": -3.0357375144958496, "logits/rejected": -3.2840394973754883, "logps/chosen": -166.8461456298828, "logps/rejected": -311.505615234375, "loss": 0.4262, "rewards/accuracies": 0.75, "rewards/chosen": -0.5038833618164062, "rewards/margins": 2.238184690475464, "rewards/rejected": -2.74206805229187, "step": 7989 }, { "epoch": 0.92, "learning_rate": 2.401966522298958e-08, "logits/chosen": -3.1495141983032227, "logits/rejected": -3.386413335800171, "logps/chosen": -289.9006652832031, "logps/rejected": -412.4649963378906, "loss": 0.0792, "rewards/accuracies": 1.0, "rewards/chosen": 0.19903138279914856, "rewards/margins": 4.108625888824463, "rewards/rejected": -3.9095945358276367, "step": 7990 }, { "epoch": 0.92, "learning_rate": 2.3984548753365325e-08, "logits/chosen": -3.7406835556030273, "logits/rejected": -3.7937631607055664, "logps/chosen": -189.83917236328125, "logps/rejected": -292.11456298828125, "loss": 0.5547, "rewards/accuracies": 0.625, "rewards/chosen": -0.40531522035598755, "rewards/margins": 2.033177614212036, "rewards/rejected": -2.4384925365448, "step": 7991 }, { "epoch": 0.92, "learning_rate": 2.3949432283741072e-08, "logits/chosen": -3.2771506309509277, "logits/rejected": -2.954336404800415, "logps/chosen": -474.3189697265625, "logps/rejected": -353.4359436035156, "loss": 0.1824, "rewards/accuracies": 1.0, "rewards/chosen": -0.3095828890800476, "rewards/margins": 2.832674741744995, "rewards/rejected": -3.1422576904296875, "step": 7992 }, { "epoch": 0.92, "learning_rate": 2.391431581411682e-08, "logits/chosen": -2.9199979305267334, "logits/rejected": -3.2120838165283203, "logps/chosen": -310.13336181640625, "logps/rejected": -375.8575134277344, "loss": 0.4055, "rewards/accuracies": 0.75, "rewards/chosen": -0.6926270127296448, "rewards/margins": 2.3751206398010254, "rewards/rejected": -3.0677478313446045, "step": 7993 }, { "epoch": 0.92, "learning_rate": 2.3879199344492563e-08, "logits/chosen": -3.2166967391967773, "logits/rejected": -2.9573628902435303, "logps/chosen": -256.3385314941406, "logps/rejected": -257.9768371582031, "loss": 0.1887, "rewards/accuracies": 1.0, "rewards/chosen": 0.4053897261619568, "rewards/margins": 2.7715206146240234, "rewards/rejected": -2.366130828857422, "step": 7994 }, { "epoch": 0.92, "learning_rate": 2.384408287486831e-08, "logits/chosen": -2.755082130432129, "logits/rejected": -2.5797276496887207, "logps/chosen": -163.77833557128906, "logps/rejected": -325.8861389160156, "loss": 0.3245, "rewards/accuracies": 0.75, "rewards/chosen": -0.09722520411014557, "rewards/margins": 2.0924129486083984, "rewards/rejected": -2.1896378993988037, "step": 7995 }, { "epoch": 0.92, "learning_rate": 2.3808966405244058e-08, "logits/chosen": -3.03595232963562, "logits/rejected": -2.835731267929077, "logps/chosen": -184.3993377685547, "logps/rejected": -202.08932495117188, "loss": 0.4698, "rewards/accuracies": 0.75, "rewards/chosen": -0.37215837836265564, "rewards/margins": 1.0346730947494507, "rewards/rejected": -1.4068315029144287, "step": 7996 }, { "epoch": 0.92, "learning_rate": 2.3773849935619805e-08, "logits/chosen": -2.7571933269500732, "logits/rejected": -2.89835262298584, "logps/chosen": -257.661376953125, "logps/rejected": -204.52627563476562, "loss": 0.2315, "rewards/accuracies": 0.875, "rewards/chosen": 0.08006398379802704, "rewards/margins": 1.9561642408370972, "rewards/rejected": -1.8761003017425537, "step": 7997 }, { "epoch": 0.92, "learning_rate": 2.373873346599555e-08, "logits/chosen": -2.554165840148926, "logits/rejected": -2.679136276245117, "logps/chosen": -221.43060302734375, "logps/rejected": -214.6878204345703, "loss": 0.6098, "rewards/accuracies": 0.75, "rewards/chosen": 0.20120036602020264, "rewards/margins": 1.3186014890670776, "rewards/rejected": -1.117401123046875, "step": 7998 }, { "epoch": 0.92, "learning_rate": 2.3703616996371296e-08, "logits/chosen": -2.6791343688964844, "logits/rejected": -2.845362901687622, "logps/chosen": -276.14117431640625, "logps/rejected": -132.26437377929688, "loss": 0.4684, "rewards/accuracies": 0.875, "rewards/chosen": -0.1979052722454071, "rewards/margins": 1.0924736261367798, "rewards/rejected": -1.2903789281845093, "step": 7999 }, { "epoch": 0.92, "learning_rate": 2.3668500526747044e-08, "logits/chosen": -2.697489023208618, "logits/rejected": -2.844036817550659, "logps/chosen": -372.4266357421875, "logps/rejected": -385.4359130859375, "loss": 0.0921, "rewards/accuracies": 1.0, "rewards/chosen": 0.6623721718788147, "rewards/margins": 3.4289071559906006, "rewards/rejected": -2.7665348052978516, "step": 8000 }, { "epoch": 0.92, "eval_logits/chosen": -2.835569143295288, "eval_logits/rejected": -2.797374725341797, "eval_logps/chosen": -293.9241638183594, "eval_logps/rejected": -237.81394958496094, "eval_loss": 0.4283079504966736, "eval_rewards/accuracies": 0.8142856955528259, "eval_rewards/chosen": 0.013080031611025333, "eval_rewards/margins": 1.3602137565612793, "eval_rewards/rejected": -1.347133755683899, "eval_runtime": 32.5613, "eval_samples_per_second": 2.15, "eval_steps_per_second": 1.075, "step": 8000 }, { "epoch": 0.92, "learning_rate": 2.3633384057122787e-08, "logits/chosen": -2.1724233627319336, "logits/rejected": -2.342683792114258, "logps/chosen": -391.67315673828125, "logps/rejected": -212.12399291992188, "loss": 0.5792, "rewards/accuracies": 0.375, "rewards/chosen": 0.3684316873550415, "rewards/margins": 0.7934242486953735, "rewards/rejected": -0.42499256134033203, "step": 8001 }, { "epoch": 0.92, "learning_rate": 2.3598267587498535e-08, "logits/chosen": -2.787339210510254, "logits/rejected": -2.713188409805298, "logps/chosen": -150.95730590820312, "logps/rejected": -132.15199279785156, "loss": 0.2549, "rewards/accuracies": 1.0, "rewards/chosen": -0.09024590998888016, "rewards/margins": 1.5078314542770386, "rewards/rejected": -1.5980772972106934, "step": 8002 }, { "epoch": 0.92, "learning_rate": 2.3563151117874282e-08, "logits/chosen": -2.625744104385376, "logits/rejected": -2.660984754562378, "logps/chosen": -226.54466247558594, "logps/rejected": -196.52415466308594, "loss": 0.4349, "rewards/accuracies": 0.75, "rewards/chosen": 0.13031230866909027, "rewards/margins": 1.8259893655776978, "rewards/rejected": -1.6956771612167358, "step": 8003 }, { "epoch": 0.92, "learning_rate": 2.352803464825003e-08, "logits/chosen": -2.718841075897217, "logits/rejected": -2.6312150955200195, "logps/chosen": -168.17881774902344, "logps/rejected": -175.49191284179688, "loss": 0.3824, "rewards/accuracies": 0.875, "rewards/chosen": 0.23947195708751678, "rewards/margins": 1.799863338470459, "rewards/rejected": -1.5603913068771362, "step": 8004 }, { "epoch": 0.92, "learning_rate": 2.3492918178625773e-08, "logits/chosen": -3.319441795349121, "logits/rejected": -3.318993330001831, "logps/chosen": -218.38894653320312, "logps/rejected": -277.68115234375, "loss": 0.2527, "rewards/accuracies": 1.0, "rewards/chosen": -0.43785539269447327, "rewards/margins": 1.687408447265625, "rewards/rejected": -2.1252639293670654, "step": 8005 }, { "epoch": 0.92, "learning_rate": 2.345780170900152e-08, "logits/chosen": -2.829005002975464, "logits/rejected": -3.0559656620025635, "logps/chosen": -230.8877410888672, "logps/rejected": -312.5660095214844, "loss": 0.1981, "rewards/accuracies": 0.875, "rewards/chosen": -0.4095618724822998, "rewards/margins": 2.4204623699188232, "rewards/rejected": -2.830024242401123, "step": 8006 }, { "epoch": 0.92, "learning_rate": 2.3422685239377268e-08, "logits/chosen": -3.584613800048828, "logits/rejected": -3.0837960243225098, "logps/chosen": -159.92909240722656, "logps/rejected": -240.91714477539062, "loss": 0.2122, "rewards/accuracies": 1.0, "rewards/chosen": -0.01420469582080841, "rewards/margins": 1.8061156272888184, "rewards/rejected": -1.8203203678131104, "step": 8007 }, { "epoch": 0.92, "learning_rate": 2.3387568769753015e-08, "logits/chosen": -3.503955841064453, "logits/rejected": -3.535832405090332, "logps/chosen": -312.06927490234375, "logps/rejected": -297.1146240234375, "loss": 0.632, "rewards/accuracies": 0.75, "rewards/chosen": -0.5955910682678223, "rewards/margins": 1.3245437145233154, "rewards/rejected": -1.9201347827911377, "step": 8008 }, { "epoch": 0.92, "learning_rate": 2.335245230012876e-08, "logits/chosen": -2.9475369453430176, "logits/rejected": -2.8640012741088867, "logps/chosen": -182.304931640625, "logps/rejected": -219.90350341796875, "loss": 0.5339, "rewards/accuracies": 0.75, "rewards/chosen": 0.25493237376213074, "rewards/margins": 1.2262051105499268, "rewards/rejected": -0.9712728261947632, "step": 8009 }, { "epoch": 0.92, "learning_rate": 2.3317335830504506e-08, "logits/chosen": -3.0417327880859375, "logits/rejected": -2.9444663524627686, "logps/chosen": -394.12701416015625, "logps/rejected": -278.0802001953125, "loss": 0.8724, "rewards/accuracies": 0.75, "rewards/chosen": -0.48388341069221497, "rewards/margins": 0.43384259939193726, "rewards/rejected": -0.9177259802818298, "step": 8010 }, { "epoch": 0.92, "learning_rate": 2.3282219360880253e-08, "logits/chosen": -3.563889980316162, "logits/rejected": -3.7180380821228027, "logps/chosen": -195.51048278808594, "logps/rejected": -285.7178039550781, "loss": 0.2942, "rewards/accuracies": 0.875, "rewards/chosen": 0.23970991373062134, "rewards/margins": 2.495819091796875, "rewards/rejected": -2.2561092376708984, "step": 8011 }, { "epoch": 0.92, "learning_rate": 2.3247102891255997e-08, "logits/chosen": -3.162048816680908, "logits/rejected": -2.8991172313690186, "logps/chosen": -367.10064697265625, "logps/rejected": -264.2179870605469, "loss": 0.2075, "rewards/accuracies": 0.75, "rewards/chosen": 0.42170393466949463, "rewards/margins": 2.7889981269836426, "rewards/rejected": -2.3672943115234375, "step": 8012 }, { "epoch": 0.92, "learning_rate": 2.3211986421631744e-08, "logits/chosen": -3.4292049407958984, "logits/rejected": -2.944840908050537, "logps/chosen": -233.85409545898438, "logps/rejected": -203.40557861328125, "loss": 0.4091, "rewards/accuracies": 0.625, "rewards/chosen": -0.14242830872535706, "rewards/margins": 1.7398442029953003, "rewards/rejected": -1.882272481918335, "step": 8013 }, { "epoch": 0.92, "learning_rate": 2.3176869952007492e-08, "logits/chosen": -3.7155160903930664, "logits/rejected": -3.73345947265625, "logps/chosen": -248.3722686767578, "logps/rejected": -198.58419799804688, "loss": 0.7253, "rewards/accuracies": 0.625, "rewards/chosen": -0.763799250125885, "rewards/margins": 0.9025464057922363, "rewards/rejected": -1.6663457155227661, "step": 8014 }, { "epoch": 0.92, "learning_rate": 2.314175348238324e-08, "logits/chosen": -3.3617641925811768, "logits/rejected": -3.1111819744110107, "logps/chosen": -368.0428466796875, "logps/rejected": -258.979736328125, "loss": 0.4685, "rewards/accuracies": 0.875, "rewards/chosen": -0.7069573998451233, "rewards/margins": 1.0855486392974854, "rewards/rejected": -1.7925058603286743, "step": 8015 }, { "epoch": 0.92, "learning_rate": 2.3106637012758983e-08, "logits/chosen": -3.7164645195007324, "logits/rejected": -3.54648494720459, "logps/chosen": -235.0008544921875, "logps/rejected": -268.8841552734375, "loss": 0.5422, "rewards/accuracies": 0.75, "rewards/chosen": -0.6305361986160278, "rewards/margins": 1.5419895648956299, "rewards/rejected": -2.1725258827209473, "step": 8016 }, { "epoch": 0.92, "learning_rate": 2.307152054313473e-08, "logits/chosen": -2.90130615234375, "logits/rejected": -2.9186625480651855, "logps/chosen": -454.83905029296875, "logps/rejected": -386.7666015625, "loss": 0.489, "rewards/accuracies": 0.75, "rewards/chosen": 0.03389720618724823, "rewards/margins": 2.1285321712493896, "rewards/rejected": -2.094635009765625, "step": 8017 }, { "epoch": 0.92, "learning_rate": 2.3036404073510477e-08, "logits/chosen": -2.756119728088379, "logits/rejected": -3.0202603340148926, "logps/chosen": -213.91641235351562, "logps/rejected": -235.87088012695312, "loss": 0.338, "rewards/accuracies": 0.875, "rewards/chosen": 0.22496476769447327, "rewards/margins": 2.1691184043884277, "rewards/rejected": -1.9441534280776978, "step": 8018 }, { "epoch": 0.92, "learning_rate": 2.300128760388622e-08, "logits/chosen": -3.0618300437927246, "logits/rejected": -2.9044430255889893, "logps/chosen": -208.32080078125, "logps/rejected": -268.1991882324219, "loss": 0.2605, "rewards/accuracies": 0.875, "rewards/chosen": 0.18411913514137268, "rewards/margins": 2.12042498588562, "rewards/rejected": -1.9363057613372803, "step": 8019 }, { "epoch": 0.92, "learning_rate": 2.296617113426197e-08, "logits/chosen": -3.560091018676758, "logits/rejected": -3.345008373260498, "logps/chosen": -238.9890594482422, "logps/rejected": -189.31939697265625, "loss": 0.7539, "rewards/accuracies": 0.625, "rewards/chosen": -0.49841228127479553, "rewards/margins": 0.1434486210346222, "rewards/rejected": -0.6418609619140625, "step": 8020 }, { "epoch": 0.92, "learning_rate": 2.2931054664637716e-08, "logits/chosen": -2.8054933547973633, "logits/rejected": -3.2866199016571045, "logps/chosen": -250.70785522460938, "logps/rejected": -238.81724548339844, "loss": 0.224, "rewards/accuracies": 1.0, "rewards/chosen": 0.43123799562454224, "rewards/margins": 1.810842514038086, "rewards/rejected": -1.3796045780181885, "step": 8021 }, { "epoch": 0.92, "learning_rate": 2.2895938195013463e-08, "logits/chosen": -3.351930618286133, "logits/rejected": -2.97041392326355, "logps/chosen": -282.8058166503906, "logps/rejected": -162.94004821777344, "loss": 0.3839, "rewards/accuracies": 0.75, "rewards/chosen": -0.22391116619110107, "rewards/margins": 1.2494418621063232, "rewards/rejected": -1.4733531475067139, "step": 8022 }, { "epoch": 0.92, "learning_rate": 2.2860821725389207e-08, "logits/chosen": -2.666412115097046, "logits/rejected": -2.5294978618621826, "logps/chosen": -328.77734375, "logps/rejected": -241.4656219482422, "loss": 0.3732, "rewards/accuracies": 0.875, "rewards/chosen": 0.0010636448860168457, "rewards/margins": 1.3246134519577026, "rewards/rejected": -1.323549747467041, "step": 8023 }, { "epoch": 0.93, "learning_rate": 2.2825705255764954e-08, "logits/chosen": -2.7709925174713135, "logits/rejected": -2.6102023124694824, "logps/chosen": -219.91946411132812, "logps/rejected": -303.8999938964844, "loss": 0.6264, "rewards/accuracies": 0.75, "rewards/chosen": -0.6625933647155762, "rewards/margins": 0.41801536083221436, "rewards/rejected": -1.0806087255477905, "step": 8024 }, { "epoch": 0.93, "learning_rate": 2.27905887861407e-08, "logits/chosen": -3.0734310150146484, "logits/rejected": -3.0897393226623535, "logps/chosen": -252.2906951904297, "logps/rejected": -326.469482421875, "loss": 0.4862, "rewards/accuracies": 0.75, "rewards/chosen": 0.19562888145446777, "rewards/margins": 1.5324032306671143, "rewards/rejected": -1.336774230003357, "step": 8025 }, { "epoch": 0.93, "learning_rate": 2.275547231651645e-08, "logits/chosen": -3.0480473041534424, "logits/rejected": -3.040666103363037, "logps/chosen": -117.35163116455078, "logps/rejected": -221.1124267578125, "loss": 0.5414, "rewards/accuracies": 0.75, "rewards/chosen": -0.3586985468864441, "rewards/margins": 1.298027753829956, "rewards/rejected": -1.656726360321045, "step": 8026 }, { "epoch": 0.93, "learning_rate": 2.2720355846892193e-08, "logits/chosen": -2.9954309463500977, "logits/rejected": -3.0361976623535156, "logps/chosen": -299.86187744140625, "logps/rejected": -261.8692321777344, "loss": 0.2456, "rewards/accuracies": 0.875, "rewards/chosen": 0.0953797921538353, "rewards/margins": 2.121105670928955, "rewards/rejected": -2.025725841522217, "step": 8027 }, { "epoch": 0.93, "learning_rate": 2.268523937726794e-08, "logits/chosen": -3.0802016258239746, "logits/rejected": -3.1498680114746094, "logps/chosen": -201.27334594726562, "logps/rejected": -263.4569091796875, "loss": 0.6643, "rewards/accuracies": 0.875, "rewards/chosen": -0.20887836813926697, "rewards/margins": 2.514129400253296, "rewards/rejected": -2.723007917404175, "step": 8028 }, { "epoch": 0.93, "learning_rate": 2.2650122907643687e-08, "logits/chosen": -2.7074484825134277, "logits/rejected": -2.3851118087768555, "logps/chosen": -160.68678283691406, "logps/rejected": -190.7713165283203, "loss": 0.4816, "rewards/accuracies": 0.75, "rewards/chosen": -0.5415672659873962, "rewards/margins": 0.8958225250244141, "rewards/rejected": -1.437389850616455, "step": 8029 }, { "epoch": 0.93, "learning_rate": 2.2615006438019428e-08, "logits/chosen": -2.3639674186706543, "logits/rejected": -2.484079360961914, "logps/chosen": -432.4842834472656, "logps/rejected": -277.243896484375, "loss": 0.3369, "rewards/accuracies": 1.0, "rewards/chosen": 0.43280091881752014, "rewards/margins": 1.264365553855896, "rewards/rejected": -0.8315646648406982, "step": 8030 }, { "epoch": 0.93, "learning_rate": 2.2579889968395175e-08, "logits/chosen": -3.488673686981201, "logits/rejected": -3.0228750705718994, "logps/chosen": -225.40988159179688, "logps/rejected": -141.59994506835938, "loss": 0.3408, "rewards/accuracies": 0.75, "rewards/chosen": 0.14891758561134338, "rewards/margins": 1.5525624752044678, "rewards/rejected": -1.4036449193954468, "step": 8031 }, { "epoch": 0.93, "learning_rate": 2.2544773498770922e-08, "logits/chosen": -2.9155263900756836, "logits/rejected": -3.0366387367248535, "logps/chosen": -96.57861328125, "logps/rejected": -200.00270080566406, "loss": 0.3303, "rewards/accuracies": 0.75, "rewards/chosen": 0.010015420615673065, "rewards/margins": 2.8856685161590576, "rewards/rejected": -2.8756532669067383, "step": 8032 }, { "epoch": 0.93, "learning_rate": 2.2509657029146666e-08, "logits/chosen": -3.1655402183532715, "logits/rejected": -2.820721387863159, "logps/chosen": -304.3794250488281, "logps/rejected": -327.476318359375, "loss": 0.4273, "rewards/accuracies": 0.75, "rewards/chosen": 0.042627573013305664, "rewards/margins": 2.0305325984954834, "rewards/rejected": -1.9879051446914673, "step": 8033 }, { "epoch": 0.93, "learning_rate": 2.2474540559522413e-08, "logits/chosen": -2.6612465381622314, "logits/rejected": -2.5630931854248047, "logps/chosen": -392.0572204589844, "logps/rejected": -321.8609619140625, "loss": 0.5964, "rewards/accuracies": 0.75, "rewards/chosen": -0.017085429280996323, "rewards/margins": 0.6436210870742798, "rewards/rejected": -0.6607065200805664, "step": 8034 }, { "epoch": 0.93, "learning_rate": 2.243942408989816e-08, "logits/chosen": -3.4162697792053223, "logits/rejected": -3.5580239295959473, "logps/chosen": -291.88970947265625, "logps/rejected": -353.3874816894531, "loss": 0.2998, "rewards/accuracies": 0.875, "rewards/chosen": -0.031783148646354675, "rewards/margins": 2.3157076835632324, "rewards/rejected": -2.3474907875061035, "step": 8035 }, { "epoch": 0.93, "learning_rate": 2.2404307620273904e-08, "logits/chosen": -2.3144588470458984, "logits/rejected": -2.3785009384155273, "logps/chosen": -211.83187866210938, "logps/rejected": -163.4004669189453, "loss": 0.5268, "rewards/accuracies": 0.875, "rewards/chosen": 0.14751967787742615, "rewards/margins": 0.7623153328895569, "rewards/rejected": -0.6147956252098083, "step": 8036 }, { "epoch": 0.93, "learning_rate": 2.236919115064965e-08, "logits/chosen": -3.133043050765991, "logits/rejected": -3.0839285850524902, "logps/chosen": -192.89895629882812, "logps/rejected": -229.5494384765625, "loss": 0.2501, "rewards/accuracies": 1.0, "rewards/chosen": -0.02995244413614273, "rewards/margins": 1.8407917022705078, "rewards/rejected": -1.8707441091537476, "step": 8037 }, { "epoch": 0.93, "learning_rate": 2.23340746810254e-08, "logits/chosen": -2.7742860317230225, "logits/rejected": -2.934805154800415, "logps/chosen": -291.8312072753906, "logps/rejected": -278.15216064453125, "loss": 0.2476, "rewards/accuracies": 0.875, "rewards/chosen": 0.0674750953912735, "rewards/margins": 2.666072368621826, "rewards/rejected": -2.598597288131714, "step": 8038 }, { "epoch": 0.93, "learning_rate": 2.2298958211401146e-08, "logits/chosen": -2.376309871673584, "logits/rejected": -2.438676118850708, "logps/chosen": -411.384033203125, "logps/rejected": -209.94436645507812, "loss": 0.2775, "rewards/accuracies": 0.875, "rewards/chosen": -0.05738034099340439, "rewards/margins": 1.6633398532867432, "rewards/rejected": -1.7207201719284058, "step": 8039 }, { "epoch": 0.93, "learning_rate": 2.226384174177689e-08, "logits/chosen": -2.9948127269744873, "logits/rejected": -2.6520228385925293, "logps/chosen": -196.4091033935547, "logps/rejected": -182.95570373535156, "loss": 0.2888, "rewards/accuracies": 1.0, "rewards/chosen": 0.14376959204673767, "rewards/margins": 1.2831405401229858, "rewards/rejected": -1.1393709182739258, "step": 8040 }, { "epoch": 0.93, "learning_rate": 2.2228725272152637e-08, "logits/chosen": -3.231226682662964, "logits/rejected": -2.991774082183838, "logps/chosen": -365.33984375, "logps/rejected": -270.27960205078125, "loss": 0.3149, "rewards/accuracies": 1.0, "rewards/chosen": -0.29492223262786865, "rewards/margins": 1.255335807800293, "rewards/rejected": -1.550257921218872, "step": 8041 }, { "epoch": 0.93, "learning_rate": 2.2193608802528385e-08, "logits/chosen": -3.7253496646881104, "logits/rejected": -3.6466448307037354, "logps/chosen": -204.97171020507812, "logps/rejected": -228.76583862304688, "loss": 0.285, "rewards/accuracies": 0.875, "rewards/chosen": 0.6465979218482971, "rewards/margins": 2.571103811264038, "rewards/rejected": -1.9245059490203857, "step": 8042 }, { "epoch": 0.93, "learning_rate": 2.2158492332904132e-08, "logits/chosen": -2.6136341094970703, "logits/rejected": -2.774787425994873, "logps/chosen": -292.97222900390625, "logps/rejected": -322.89801025390625, "loss": 0.5204, "rewards/accuracies": 0.5, "rewards/chosen": -0.4007762670516968, "rewards/margins": 1.4480220079421997, "rewards/rejected": -1.8487982749938965, "step": 8043 }, { "epoch": 0.93, "learning_rate": 2.2123375863279876e-08, "logits/chosen": -2.6222429275512695, "logits/rejected": -2.6073966026306152, "logps/chosen": -249.88442993164062, "logps/rejected": -206.62930297851562, "loss": 0.6687, "rewards/accuracies": 0.875, "rewards/chosen": 0.47415265440940857, "rewards/margins": 1.5394659042358398, "rewards/rejected": -1.0653133392333984, "step": 8044 }, { "epoch": 0.93, "learning_rate": 2.2088259393655623e-08, "logits/chosen": -3.178874969482422, "logits/rejected": -3.1844992637634277, "logps/chosen": -269.6679992675781, "logps/rejected": -370.9991760253906, "loss": 0.3731, "rewards/accuracies": 0.875, "rewards/chosen": -0.11180319637060165, "rewards/margins": 2.7016475200653076, "rewards/rejected": -2.813450574874878, "step": 8045 }, { "epoch": 0.93, "learning_rate": 2.205314292403137e-08, "logits/chosen": -3.90211820602417, "logits/rejected": -3.7662229537963867, "logps/chosen": -168.53150939941406, "logps/rejected": -203.20297241210938, "loss": 0.464, "rewards/accuracies": 0.75, "rewards/chosen": -0.19040226936340332, "rewards/margins": 1.8774058818817139, "rewards/rejected": -2.067808151245117, "step": 8046 }, { "epoch": 0.93, "learning_rate": 2.2018026454407114e-08, "logits/chosen": -2.7487173080444336, "logits/rejected": -2.5938825607299805, "logps/chosen": -310.8316345214844, "logps/rejected": -251.44712829589844, "loss": 0.1372, "rewards/accuracies": 1.0, "rewards/chosen": 0.4507037401199341, "rewards/margins": 3.3422939777374268, "rewards/rejected": -2.8915903568267822, "step": 8047 }, { "epoch": 0.93, "learning_rate": 2.198290998478286e-08, "logits/chosen": -2.4923648834228516, "logits/rejected": -2.4849119186401367, "logps/chosen": -249.54673767089844, "logps/rejected": -371.259521484375, "loss": 0.3945, "rewards/accuracies": 0.75, "rewards/chosen": 0.34374839067459106, "rewards/margins": 1.7309404611587524, "rewards/rejected": -1.3871921300888062, "step": 8048 }, { "epoch": 0.93, "learning_rate": 2.194779351515861e-08, "logits/chosen": -3.4455666542053223, "logits/rejected": -3.2057583332061768, "logps/chosen": -380.2109680175781, "logps/rejected": -319.1497802734375, "loss": 0.2471, "rewards/accuracies": 0.875, "rewards/chosen": 0.554500937461853, "rewards/margins": 1.674023985862732, "rewards/rejected": -1.119523048400879, "step": 8049 }, { "epoch": 0.93, "learning_rate": 2.1912677045534356e-08, "logits/chosen": -3.203096866607666, "logits/rejected": -2.894627332687378, "logps/chosen": -220.1932373046875, "logps/rejected": -253.53416442871094, "loss": 0.4163, "rewards/accuracies": 0.75, "rewards/chosen": -0.37024858593940735, "rewards/margins": 1.0915600061416626, "rewards/rejected": -1.4618085622787476, "step": 8050 }, { "epoch": 0.93, "learning_rate": 2.18775605759101e-08, "logits/chosen": -3.371891736984253, "logits/rejected": -3.72292160987854, "logps/chosen": -145.913330078125, "logps/rejected": -273.21099853515625, "loss": 0.5407, "rewards/accuracies": 0.75, "rewards/chosen": -0.5463504791259766, "rewards/margins": 1.7948492765426636, "rewards/rejected": -2.3411996364593506, "step": 8051 }, { "epoch": 0.93, "learning_rate": 2.1842444106285847e-08, "logits/chosen": -2.977965831756592, "logits/rejected": -2.79372239112854, "logps/chosen": -504.9857177734375, "logps/rejected": -382.57952880859375, "loss": 0.6823, "rewards/accuracies": 0.625, "rewards/chosen": -0.6529496908187866, "rewards/margins": 0.35635823011398315, "rewards/rejected": -1.009307861328125, "step": 8052 }, { "epoch": 0.93, "learning_rate": 2.1807327636661594e-08, "logits/chosen": -2.5472304821014404, "logits/rejected": -2.92918062210083, "logps/chosen": -306.8487548828125, "logps/rejected": -284.9100036621094, "loss": 0.4324, "rewards/accuracies": 0.875, "rewards/chosen": -0.2854974567890167, "rewards/margins": 1.710404396057129, "rewards/rejected": -1.9959018230438232, "step": 8053 }, { "epoch": 0.93, "learning_rate": 2.1772211167037338e-08, "logits/chosen": -2.8351669311523438, "logits/rejected": -2.8565452098846436, "logps/chosen": -134.96197509765625, "logps/rejected": -220.5603790283203, "loss": 0.436, "rewards/accuracies": 0.75, "rewards/chosen": 0.4804842174053192, "rewards/margins": 2.1051108837127686, "rewards/rejected": -1.6246265172958374, "step": 8054 }, { "epoch": 0.93, "learning_rate": 2.1737094697413085e-08, "logits/chosen": -2.892791986465454, "logits/rejected": -3.168473958969116, "logps/chosen": -275.5835266113281, "logps/rejected": -234.11143493652344, "loss": 0.3464, "rewards/accuracies": 0.875, "rewards/chosen": -0.362690806388855, "rewards/margins": 1.894276738166809, "rewards/rejected": -2.256967544555664, "step": 8055 }, { "epoch": 0.93, "learning_rate": 2.1701978227788833e-08, "logits/chosen": -3.3889451026916504, "logits/rejected": -3.223766565322876, "logps/chosen": -291.94903564453125, "logps/rejected": -216.06309509277344, "loss": 0.4423, "rewards/accuracies": 0.625, "rewards/chosen": -0.3739625811576843, "rewards/margins": 1.0975284576416016, "rewards/rejected": -1.4714910984039307, "step": 8056 }, { "epoch": 0.93, "learning_rate": 2.166686175816458e-08, "logits/chosen": -3.1827874183654785, "logits/rejected": -2.876659870147705, "logps/chosen": -283.4848937988281, "logps/rejected": -332.50244140625, "loss": 0.5133, "rewards/accuracies": 0.625, "rewards/chosen": -0.006913580000400543, "rewards/margins": 1.38084876537323, "rewards/rejected": -1.3877623081207275, "step": 8057 }, { "epoch": 0.93, "learning_rate": 2.1631745288540324e-08, "logits/chosen": -3.051818609237671, "logits/rejected": -3.093229293823242, "logps/chosen": -206.67462158203125, "logps/rejected": -335.47442626953125, "loss": 0.2549, "rewards/accuracies": 0.75, "rewards/chosen": 0.2838868200778961, "rewards/margins": 3.458343029022217, "rewards/rejected": -3.1744561195373535, "step": 8058 }, { "epoch": 0.93, "learning_rate": 2.159662881891607e-08, "logits/chosen": -3.3496854305267334, "logits/rejected": -3.238542079925537, "logps/chosen": -336.114013671875, "logps/rejected": -334.67803955078125, "loss": 0.1295, "rewards/accuracies": 1.0, "rewards/chosen": -0.30358371138572693, "rewards/margins": 3.1701087951660156, "rewards/rejected": -3.4736926555633545, "step": 8059 }, { "epoch": 0.93, "learning_rate": 2.1561512349291818e-08, "logits/chosen": -2.9567649364471436, "logits/rejected": -3.3683412075042725, "logps/chosen": -187.49916076660156, "logps/rejected": -240.76626586914062, "loss": 0.3362, "rewards/accuracies": 0.875, "rewards/chosen": -0.19685910642147064, "rewards/margins": 2.6336238384246826, "rewards/rejected": -2.8304831981658936, "step": 8060 }, { "epoch": 0.93, "learning_rate": 2.1526395879667562e-08, "logits/chosen": -2.126779079437256, "logits/rejected": -2.24544095993042, "logps/chosen": -414.1973876953125, "logps/rejected": -291.379150390625, "loss": 0.2698, "rewards/accuracies": 0.875, "rewards/chosen": 0.18754692375659943, "rewards/margins": 2.70690655708313, "rewards/rejected": -2.519359588623047, "step": 8061 }, { "epoch": 0.93, "learning_rate": 2.149127941004331e-08, "logits/chosen": -3.3584656715393066, "logits/rejected": -3.216667652130127, "logps/chosen": -152.94430541992188, "logps/rejected": -120.81934356689453, "loss": 0.5034, "rewards/accuracies": 0.75, "rewards/chosen": -0.14885786175727844, "rewards/margins": 1.4871370792388916, "rewards/rejected": -1.6359946727752686, "step": 8062 }, { "epoch": 0.93, "learning_rate": 2.1456162940419057e-08, "logits/chosen": -2.7087745666503906, "logits/rejected": -2.960291862487793, "logps/chosen": -152.6139373779297, "logps/rejected": -309.90594482421875, "loss": 0.3045, "rewards/accuracies": 1.0, "rewards/chosen": 0.1740807592868805, "rewards/margins": 1.82393217086792, "rewards/rejected": -1.6498514413833618, "step": 8063 }, { "epoch": 0.93, "learning_rate": 2.1421046470794804e-08, "logits/chosen": -2.5769567489624023, "logits/rejected": -2.454758405685425, "logps/chosen": -323.085693359375, "logps/rejected": -300.901123046875, "loss": 0.5496, "rewards/accuracies": 0.875, "rewards/chosen": -0.36756354570388794, "rewards/margins": 1.2306584119796753, "rewards/rejected": -1.598221778869629, "step": 8064 }, { "epoch": 0.93, "learning_rate": 2.1385930001170548e-08, "logits/chosen": -2.2873916625976562, "logits/rejected": -2.2486572265625, "logps/chosen": -469.1017761230469, "logps/rejected": -356.34124755859375, "loss": 0.3791, "rewards/accuracies": 0.875, "rewards/chosen": -0.014566242694854736, "rewards/margins": 1.237168550491333, "rewards/rejected": -1.251734733581543, "step": 8065 }, { "epoch": 0.93, "learning_rate": 2.1350813531546295e-08, "logits/chosen": -2.975848436355591, "logits/rejected": -2.806783676147461, "logps/chosen": -143.56488037109375, "logps/rejected": -147.34429931640625, "loss": 0.7565, "rewards/accuracies": 0.75, "rewards/chosen": -1.0422065258026123, "rewards/margins": 0.36837247014045715, "rewards/rejected": -1.410578966140747, "step": 8066 }, { "epoch": 0.93, "learning_rate": 2.1315697061922042e-08, "logits/chosen": -2.4749534130096436, "logits/rejected": -2.714411973953247, "logps/chosen": -149.89308166503906, "logps/rejected": -277.4193420410156, "loss": 0.1987, "rewards/accuracies": 0.875, "rewards/chosen": 0.6521108746528625, "rewards/margins": 2.8434157371520996, "rewards/rejected": -2.1913046836853027, "step": 8067 }, { "epoch": 0.93, "learning_rate": 2.128058059229779e-08, "logits/chosen": -3.4942097663879395, "logits/rejected": -3.631436347961426, "logps/chosen": -249.5019073486328, "logps/rejected": -292.50384521484375, "loss": 0.2597, "rewards/accuracies": 0.875, "rewards/chosen": -0.053129106760025024, "rewards/margins": 2.578902006149292, "rewards/rejected": -2.632031202316284, "step": 8068 }, { "epoch": 0.93, "learning_rate": 2.1245464122673533e-08, "logits/chosen": -2.4760751724243164, "logits/rejected": -2.6290132999420166, "logps/chosen": -194.73138427734375, "logps/rejected": -325.412353515625, "loss": 0.3041, "rewards/accuracies": 0.75, "rewards/chosen": -0.030846334993839264, "rewards/margins": 2.0988078117370605, "rewards/rejected": -2.1296539306640625, "step": 8069 }, { "epoch": 0.93, "learning_rate": 2.121034765304928e-08, "logits/chosen": -3.968874931335449, "logits/rejected": -3.7376835346221924, "logps/chosen": -267.50592041015625, "logps/rejected": -242.0328369140625, "loss": 0.3931, "rewards/accuracies": 0.625, "rewards/chosen": 0.05458257719874382, "rewards/margins": 2.5381810665130615, "rewards/rejected": -2.483598470687866, "step": 8070 }, { "epoch": 0.93, "learning_rate": 2.1175231183425028e-08, "logits/chosen": -3.180668830871582, "logits/rejected": -3.17086124420166, "logps/chosen": -268.07586669921875, "logps/rejected": -198.1901092529297, "loss": 0.5335, "rewards/accuracies": 0.875, "rewards/chosen": -0.34602439403533936, "rewards/margins": 2.361846446990967, "rewards/rejected": -2.7078709602355957, "step": 8071 }, { "epoch": 0.93, "learning_rate": 2.1140114713800772e-08, "logits/chosen": -3.199389934539795, "logits/rejected": -3.101529121398926, "logps/chosen": -193.34632873535156, "logps/rejected": -228.3878936767578, "loss": 0.3745, "rewards/accuracies": 0.875, "rewards/chosen": 0.02575865387916565, "rewards/margins": 1.104827880859375, "rewards/rejected": -1.0790691375732422, "step": 8072 }, { "epoch": 0.93, "learning_rate": 2.110499824417652e-08, "logits/chosen": -3.213878870010376, "logits/rejected": -3.296269655227661, "logps/chosen": -147.52151489257812, "logps/rejected": -208.64244079589844, "loss": 0.4546, "rewards/accuracies": 0.875, "rewards/chosen": 0.16033579409122467, "rewards/margins": 2.320171594619751, "rewards/rejected": -2.1598358154296875, "step": 8073 }, { "epoch": 0.93, "learning_rate": 2.1069881774552266e-08, "logits/chosen": -3.5929787158966064, "logits/rejected": -3.459207773208618, "logps/chosen": -154.98812866210938, "logps/rejected": -167.06016540527344, "loss": 0.375, "rewards/accuracies": 0.75, "rewards/chosen": 0.05450374633073807, "rewards/margins": 1.8598337173461914, "rewards/rejected": -1.8053300380706787, "step": 8074 }, { "epoch": 0.93, "learning_rate": 2.1034765304928014e-08, "logits/chosen": -2.794090747833252, "logits/rejected": -2.7147178649902344, "logps/chosen": -429.6033630371094, "logps/rejected": -292.47271728515625, "loss": 0.2548, "rewards/accuracies": 0.875, "rewards/chosen": 0.15033668279647827, "rewards/margins": 2.142580032348633, "rewards/rejected": -1.9922435283660889, "step": 8075 }, { "epoch": 0.93, "learning_rate": 2.0999648835303758e-08, "logits/chosen": -2.8155558109283447, "logits/rejected": -2.667402744293213, "logps/chosen": -436.729736328125, "logps/rejected": -229.29457092285156, "loss": 0.3332, "rewards/accuracies": 0.875, "rewards/chosen": 0.7033935785293579, "rewards/margins": 1.3620812892913818, "rewards/rejected": -0.6586877107620239, "step": 8076 }, { "epoch": 0.93, "learning_rate": 2.0964532365679505e-08, "logits/chosen": -3.4112768173217773, "logits/rejected": -3.194660186767578, "logps/chosen": -348.2955322265625, "logps/rejected": -208.45623779296875, "loss": 0.5913, "rewards/accuracies": 0.625, "rewards/chosen": -0.29935359954833984, "rewards/margins": 0.7482244372367859, "rewards/rejected": -1.0475780963897705, "step": 8077 }, { "epoch": 0.93, "learning_rate": 2.0929415896055245e-08, "logits/chosen": -3.1244821548461914, "logits/rejected": -3.344791889190674, "logps/chosen": -226.32232666015625, "logps/rejected": -245.29757690429688, "loss": 0.1581, "rewards/accuracies": 1.0, "rewards/chosen": 0.0761437863111496, "rewards/margins": 4.399238586425781, "rewards/rejected": -4.323094844818115, "step": 8078 }, { "epoch": 0.93, "learning_rate": 2.0894299426430993e-08, "logits/chosen": -3.0453269481658936, "logits/rejected": -2.6799395084381104, "logps/chosen": -375.8236083984375, "logps/rejected": -293.3228759765625, "loss": 0.4297, "rewards/accuracies": 0.75, "rewards/chosen": 0.311745822429657, "rewards/margins": 1.4517728090286255, "rewards/rejected": -1.1400269269943237, "step": 8079 }, { "epoch": 0.93, "learning_rate": 2.085918295680674e-08, "logits/chosen": -3.0627384185791016, "logits/rejected": -3.2640278339385986, "logps/chosen": -219.43557739257812, "logps/rejected": -229.38941955566406, "loss": 0.2537, "rewards/accuracies": 0.875, "rewards/chosen": 0.7568104267120361, "rewards/margins": 2.6775851249694824, "rewards/rejected": -1.9207748174667358, "step": 8080 }, { "epoch": 0.93, "learning_rate": 2.0824066487182487e-08, "logits/chosen": -2.988327741622925, "logits/rejected": -2.8853816986083984, "logps/chosen": -246.1224822998047, "logps/rejected": -261.421630859375, "loss": 0.3027, "rewards/accuracies": 0.875, "rewards/chosen": -0.2626824975013733, "rewards/margins": 1.876063585281372, "rewards/rejected": -2.1387460231781006, "step": 8081 }, { "epoch": 0.93, "learning_rate": 2.078895001755823e-08, "logits/chosen": -3.0947728157043457, "logits/rejected": -3.757822036743164, "logps/chosen": -133.91244506835938, "logps/rejected": -247.65306091308594, "loss": 0.3825, "rewards/accuracies": 0.75, "rewards/chosen": -0.3986765146255493, "rewards/margins": 3.112192392349243, "rewards/rejected": -3.510869026184082, "step": 8082 }, { "epoch": 0.93, "learning_rate": 2.0753833547933978e-08, "logits/chosen": -3.8714845180511475, "logits/rejected": -3.9260966777801514, "logps/chosen": -84.74301147460938, "logps/rejected": -111.62326049804688, "loss": 0.249, "rewards/accuracies": 1.0, "rewards/chosen": 0.344917356967926, "rewards/margins": 1.8704187870025635, "rewards/rejected": -1.5255014896392822, "step": 8083 }, { "epoch": 0.93, "learning_rate": 2.0718717078309725e-08, "logits/chosen": -3.135199546813965, "logits/rejected": -3.263073682785034, "logps/chosen": -367.54632568359375, "logps/rejected": -248.56723022460938, "loss": 0.6517, "rewards/accuracies": 0.625, "rewards/chosen": -0.5009723901748657, "rewards/margins": 0.6522146463394165, "rewards/rejected": -1.1531870365142822, "step": 8084 }, { "epoch": 0.93, "learning_rate": 2.0683600608685473e-08, "logits/chosen": -2.941527843475342, "logits/rejected": -2.638857841491699, "logps/chosen": -297.9062805175781, "logps/rejected": -212.77609252929688, "loss": 0.444, "rewards/accuracies": 0.875, "rewards/chosen": -0.28539276123046875, "rewards/margins": 0.8133763670921326, "rewards/rejected": -1.0987690687179565, "step": 8085 }, { "epoch": 0.93, "learning_rate": 2.0648484139061217e-08, "logits/chosen": -2.6733078956604004, "logits/rejected": -2.697108030319214, "logps/chosen": -300.8211669921875, "logps/rejected": -314.420166015625, "loss": 0.4041, "rewards/accuracies": 0.75, "rewards/chosen": -0.5142174363136292, "rewards/margins": 1.8323487043380737, "rewards/rejected": -2.3465662002563477, "step": 8086 }, { "epoch": 0.93, "learning_rate": 2.0613367669436964e-08, "logits/chosen": -3.5505597591400146, "logits/rejected": -3.102961540222168, "logps/chosen": -389.4725036621094, "logps/rejected": -280.70111083984375, "loss": 0.2601, "rewards/accuracies": 0.875, "rewards/chosen": 0.14622654020786285, "rewards/margins": 1.5360004901885986, "rewards/rejected": -1.3897740840911865, "step": 8087 }, { "epoch": 0.93, "learning_rate": 2.057825119981271e-08, "logits/chosen": -3.521099805831909, "logits/rejected": -3.9785842895507812, "logps/chosen": -135.639404296875, "logps/rejected": -276.3324890136719, "loss": 0.4127, "rewards/accuracies": 0.875, "rewards/chosen": 0.05982981622219086, "rewards/margins": 1.9095255136489868, "rewards/rejected": -1.8496955633163452, "step": 8088 }, { "epoch": 0.93, "learning_rate": 2.0543134730188455e-08, "logits/chosen": -3.23769211769104, "logits/rejected": -3.558887004852295, "logps/chosen": -202.42324829101562, "logps/rejected": -384.7553405761719, "loss": 0.2633, "rewards/accuracies": 0.875, "rewards/chosen": 0.22373290359973907, "rewards/margins": 2.626112699508667, "rewards/rejected": -2.4023799896240234, "step": 8089 }, { "epoch": 0.93, "learning_rate": 2.0508018260564202e-08, "logits/chosen": -3.4767751693725586, "logits/rejected": -3.651686906814575, "logps/chosen": -171.30287170410156, "logps/rejected": -231.30938720703125, "loss": 0.1564, "rewards/accuracies": 1.0, "rewards/chosen": 0.5225474834442139, "rewards/margins": 2.7629446983337402, "rewards/rejected": -2.2403972148895264, "step": 8090 }, { "epoch": 0.93, "learning_rate": 2.047290179093995e-08, "logits/chosen": -3.200641632080078, "logits/rejected": -3.3357455730438232, "logps/chosen": -335.6983642578125, "logps/rejected": -288.73541259765625, "loss": 0.2827, "rewards/accuracies": 0.875, "rewards/chosen": -0.22778654098510742, "rewards/margins": 1.957155704498291, "rewards/rejected": -2.1849424839019775, "step": 8091 }, { "epoch": 0.93, "learning_rate": 2.0437785321315697e-08, "logits/chosen": -2.8114664554595947, "logits/rejected": -2.6173765659332275, "logps/chosen": -153.9854736328125, "logps/rejected": -288.30682373046875, "loss": 0.2699, "rewards/accuracies": 1.0, "rewards/chosen": -0.061629436910152435, "rewards/margins": 2.3366758823394775, "rewards/rejected": -2.3983051776885986, "step": 8092 }, { "epoch": 0.93, "learning_rate": 2.040266885169144e-08, "logits/chosen": -3.4999642372131348, "logits/rejected": -3.2342216968536377, "logps/chosen": -172.46441650390625, "logps/rejected": -189.98513793945312, "loss": 0.3444, "rewards/accuracies": 0.875, "rewards/chosen": 0.058913201093673706, "rewards/margins": 2.567509174346924, "rewards/rejected": -2.508596181869507, "step": 8093 }, { "epoch": 0.93, "learning_rate": 2.0367552382067188e-08, "logits/chosen": -2.0452213287353516, "logits/rejected": -2.1029324531555176, "logps/chosen": -281.57843017578125, "logps/rejected": -230.03387451171875, "loss": 0.4449, "rewards/accuracies": 0.875, "rewards/chosen": -0.12009875476360321, "rewards/margins": 0.9490899443626404, "rewards/rejected": -1.0691885948181152, "step": 8094 }, { "epoch": 0.93, "learning_rate": 2.0332435912442935e-08, "logits/chosen": -3.707587242126465, "logits/rejected": -3.682727575302124, "logps/chosen": -145.97369384765625, "logps/rejected": -205.54718017578125, "loss": 0.2632, "rewards/accuracies": 1.0, "rewards/chosen": -0.22958174347877502, "rewards/margins": 2.3507938385009766, "rewards/rejected": -2.5803754329681396, "step": 8095 }, { "epoch": 0.93, "learning_rate": 2.029731944281868e-08, "logits/chosen": -3.1546547412872314, "logits/rejected": -3.0216550827026367, "logps/chosen": -178.43344116210938, "logps/rejected": -140.3117218017578, "loss": 0.699, "rewards/accuracies": 0.625, "rewards/chosen": -0.8672754764556885, "rewards/margins": 0.5112407207489014, "rewards/rejected": -1.3785161972045898, "step": 8096 }, { "epoch": 0.93, "learning_rate": 2.0262202973194426e-08, "logits/chosen": -2.9222824573516846, "logits/rejected": -2.6594746112823486, "logps/chosen": -402.86126708984375, "logps/rejected": -351.3917541503906, "loss": 0.6282, "rewards/accuracies": 0.625, "rewards/chosen": 0.02136099338531494, "rewards/margins": 1.064349889755249, "rewards/rejected": -1.0429890155792236, "step": 8097 }, { "epoch": 0.93, "learning_rate": 2.0227086503570174e-08, "logits/chosen": -3.1137003898620605, "logits/rejected": -3.1431779861450195, "logps/chosen": -161.5111541748047, "logps/rejected": -203.87982177734375, "loss": 0.4083, "rewards/accuracies": 0.75, "rewards/chosen": -0.49005863070487976, "rewards/margins": 3.1748738288879395, "rewards/rejected": -3.6649322509765625, "step": 8098 }, { "epoch": 0.93, "learning_rate": 2.019197003394592e-08, "logits/chosen": -3.5250937938690186, "logits/rejected": -3.4417107105255127, "logps/chosen": -339.6767578125, "logps/rejected": -265.8301086425781, "loss": 0.394, "rewards/accuracies": 0.75, "rewards/chosen": -0.48561084270477295, "rewards/margins": 2.605225086212158, "rewards/rejected": -3.0908360481262207, "step": 8099 }, { "epoch": 0.93, "learning_rate": 2.0156853564321665e-08, "logits/chosen": -3.436467170715332, "logits/rejected": -3.380082130432129, "logps/chosen": -376.41094970703125, "logps/rejected": -345.2998962402344, "loss": 0.4091, "rewards/accuracies": 0.875, "rewards/chosen": -0.22684729099273682, "rewards/margins": 2.242433547973633, "rewards/rejected": -2.469280481338501, "step": 8100 }, { "epoch": 0.93, "learning_rate": 2.0121737094697412e-08, "logits/chosen": -3.0203166007995605, "logits/rejected": -3.010812759399414, "logps/chosen": -204.81243896484375, "logps/rejected": -286.08551025390625, "loss": 0.3138, "rewards/accuracies": 0.75, "rewards/chosen": -0.04367043077945709, "rewards/margins": 2.403377056121826, "rewards/rejected": -2.4470479488372803, "step": 8101 }, { "epoch": 0.93, "learning_rate": 2.008662062507316e-08, "logits/chosen": -3.7657790184020996, "logits/rejected": -3.7112507820129395, "logps/chosen": -129.5463409423828, "logps/rejected": -232.66021728515625, "loss": 0.222, "rewards/accuracies": 0.875, "rewards/chosen": 0.06256355345249176, "rewards/margins": 3.1622138023376465, "rewards/rejected": -3.0996501445770264, "step": 8102 }, { "epoch": 0.93, "learning_rate": 2.0051504155448903e-08, "logits/chosen": -2.3435287475585938, "logits/rejected": -2.4874625205993652, "logps/chosen": -339.7044372558594, "logps/rejected": -233.93223571777344, "loss": 0.5138, "rewards/accuracies": 0.75, "rewards/chosen": -0.1205572783946991, "rewards/margins": 0.5190112590789795, "rewards/rejected": -0.639568567276001, "step": 8103 }, { "epoch": 0.93, "learning_rate": 2.001638768582465e-08, "logits/chosen": -3.2479965686798096, "logits/rejected": -3.1107118129730225, "logps/chosen": -267.124267578125, "logps/rejected": -195.2169647216797, "loss": 0.3014, "rewards/accuracies": 0.75, "rewards/chosen": 0.23511840403079987, "rewards/margins": 2.248173475265503, "rewards/rejected": -2.0130553245544434, "step": 8104 }, { "epoch": 0.93, "learning_rate": 1.9981271216200398e-08, "logits/chosen": -2.973837375640869, "logits/rejected": -3.0558862686157227, "logps/chosen": -141.24984741210938, "logps/rejected": -218.24215698242188, "loss": 0.3533, "rewards/accuracies": 0.75, "rewards/chosen": 0.5070230960845947, "rewards/margins": 1.8132102489471436, "rewards/rejected": -1.3061871528625488, "step": 8105 }, { "epoch": 0.93, "learning_rate": 1.9946154746576145e-08, "logits/chosen": -3.0318777561187744, "logits/rejected": -3.1058592796325684, "logps/chosen": -142.4993438720703, "logps/rejected": -218.39588928222656, "loss": 0.2766, "rewards/accuracies": 0.875, "rewards/chosen": 0.35413381457328796, "rewards/margins": 2.1445133686065674, "rewards/rejected": -1.7903796434402466, "step": 8106 }, { "epoch": 0.93, "learning_rate": 1.991103827695189e-08, "logits/chosen": -3.576904773712158, "logits/rejected": -3.4405999183654785, "logps/chosen": -249.99359130859375, "logps/rejected": -247.49388122558594, "loss": 0.4778, "rewards/accuracies": 0.75, "rewards/chosen": -0.05463904142379761, "rewards/margins": 1.3230315446853638, "rewards/rejected": -1.3776706457138062, "step": 8107 }, { "epoch": 0.93, "learning_rate": 1.9875921807327636e-08, "logits/chosen": -3.0454511642456055, "logits/rejected": -2.6837806701660156, "logps/chosen": -199.45028686523438, "logps/rejected": -241.3609619140625, "loss": 0.3736, "rewards/accuracies": 0.75, "rewards/chosen": -0.05857644975185394, "rewards/margins": 2.8184494972229004, "rewards/rejected": -2.877026081085205, "step": 8108 }, { "epoch": 0.93, "learning_rate": 1.9840805337703383e-08, "logits/chosen": -3.1020450592041016, "logits/rejected": -3.0849525928497314, "logps/chosen": -325.1348876953125, "logps/rejected": -323.0201416015625, "loss": 0.1639, "rewards/accuracies": 1.0, "rewards/chosen": 0.3762098550796509, "rewards/margins": 2.3277084827423096, "rewards/rejected": -1.9514985084533691, "step": 8109 }, { "epoch": 0.93, "learning_rate": 1.980568886807913e-08, "logits/chosen": -3.2337746620178223, "logits/rejected": -2.676320791244507, "logps/chosen": -261.4246826171875, "logps/rejected": -211.84219360351562, "loss": 0.3063, "rewards/accuracies": 0.875, "rewards/chosen": -0.26545172929763794, "rewards/margins": 1.704857349395752, "rewards/rejected": -1.9703090190887451, "step": 8110 }, { "epoch": 0.94, "learning_rate": 1.9770572398454874e-08, "logits/chosen": -3.0332088470458984, "logits/rejected": -3.1374850273132324, "logps/chosen": -320.1058654785156, "logps/rejected": -205.93597412109375, "loss": 0.334, "rewards/accuracies": 0.875, "rewards/chosen": 0.06201007217168808, "rewards/margins": 1.3711251020431519, "rewards/rejected": -1.3091151714324951, "step": 8111 }, { "epoch": 0.94, "learning_rate": 1.9735455928830622e-08, "logits/chosen": -3.255770444869995, "logits/rejected": -3.0535144805908203, "logps/chosen": -232.1608123779297, "logps/rejected": -262.94073486328125, "loss": 0.6199, "rewards/accuracies": 0.75, "rewards/chosen": 0.498000830411911, "rewards/margins": 1.6027576923370361, "rewards/rejected": -1.1047568321228027, "step": 8112 }, { "epoch": 0.94, "learning_rate": 1.970033945920637e-08, "logits/chosen": -3.220871686935425, "logits/rejected": -3.1240410804748535, "logps/chosen": -248.4283447265625, "logps/rejected": -339.1689147949219, "loss": 0.3574, "rewards/accuracies": 0.875, "rewards/chosen": -0.44286996126174927, "rewards/margins": 2.863872766494751, "rewards/rejected": -3.3067429065704346, "step": 8113 }, { "epoch": 0.94, "learning_rate": 1.9665222989582113e-08, "logits/chosen": -2.868803024291992, "logits/rejected": -2.903228282928467, "logps/chosen": -307.3362731933594, "logps/rejected": -438.91668701171875, "loss": 0.1253, "rewards/accuracies": 1.0, "rewards/chosen": 0.671288013458252, "rewards/margins": 3.1667098999023438, "rewards/rejected": -2.495421886444092, "step": 8114 }, { "epoch": 0.94, "learning_rate": 1.963010651995786e-08, "logits/chosen": -3.5724828243255615, "logits/rejected": -3.648022174835205, "logps/chosen": -144.06398010253906, "logps/rejected": -289.49700927734375, "loss": 0.2776, "rewards/accuracies": 0.75, "rewards/chosen": 0.003629133105278015, "rewards/margins": 4.023458480834961, "rewards/rejected": -4.019829750061035, "step": 8115 }, { "epoch": 0.94, "learning_rate": 1.9594990050333607e-08, "logits/chosen": -3.0363411903381348, "logits/rejected": -3.2065887451171875, "logps/chosen": -280.5263671875, "logps/rejected": -224.31756591796875, "loss": 0.4107, "rewards/accuracies": 0.75, "rewards/chosen": -0.25136861205101013, "rewards/margins": 2.149350643157959, "rewards/rejected": -2.400719165802002, "step": 8116 }, { "epoch": 0.94, "learning_rate": 1.9559873580709355e-08, "logits/chosen": -2.7716751098632812, "logits/rejected": -2.7742228507995605, "logps/chosen": -475.25665283203125, "logps/rejected": -302.7544860839844, "loss": 0.2237, "rewards/accuracies": 0.875, "rewards/chosen": -0.11932497471570969, "rewards/margins": 2.431936502456665, "rewards/rejected": -2.5512616634368896, "step": 8117 }, { "epoch": 0.94, "learning_rate": 1.95247571110851e-08, "logits/chosen": -2.4218568801879883, "logits/rejected": -2.6374778747558594, "logps/chosen": -422.4732971191406, "logps/rejected": -383.747802734375, "loss": 0.3806, "rewards/accuracies": 0.875, "rewards/chosen": -0.6989766359329224, "rewards/margins": 1.4744174480438232, "rewards/rejected": -2.173394203186035, "step": 8118 }, { "epoch": 0.94, "learning_rate": 1.9489640641460846e-08, "logits/chosen": -2.6716842651367188, "logits/rejected": -3.0347440242767334, "logps/chosen": -186.69271850585938, "logps/rejected": -417.3408508300781, "loss": 0.1425, "rewards/accuracies": 0.875, "rewards/chosen": 0.33973628282546997, "rewards/margins": 3.867523193359375, "rewards/rejected": -3.5277867317199707, "step": 8119 }, { "epoch": 0.94, "learning_rate": 1.9454524171836593e-08, "logits/chosen": -2.188133955001831, "logits/rejected": -2.2462518215179443, "logps/chosen": -326.8382873535156, "logps/rejected": -268.86566162109375, "loss": 0.4467, "rewards/accuracies": 0.75, "rewards/chosen": 0.09941811114549637, "rewards/margins": 1.08613920211792, "rewards/rejected": -0.9867210984230042, "step": 8120 }, { "epoch": 0.94, "learning_rate": 1.9419407702212337e-08, "logits/chosen": -3.0630125999450684, "logits/rejected": -2.987393617630005, "logps/chosen": -401.7766418457031, "logps/rejected": -379.81121826171875, "loss": 0.3371, "rewards/accuracies": 0.875, "rewards/chosen": 0.12918640673160553, "rewards/margins": 1.7517932653427124, "rewards/rejected": -1.6226069927215576, "step": 8121 }, { "epoch": 0.94, "learning_rate": 1.9384291232588084e-08, "logits/chosen": -2.826364040374756, "logits/rejected": -2.737515687942505, "logps/chosen": -348.8150634765625, "logps/rejected": -386.28607177734375, "loss": 0.4845, "rewards/accuracies": 0.875, "rewards/chosen": -0.2681174874305725, "rewards/margins": 1.8318108320236206, "rewards/rejected": -2.099928379058838, "step": 8122 }, { "epoch": 0.94, "learning_rate": 1.934917476296383e-08, "logits/chosen": -3.2220664024353027, "logits/rejected": -3.2804388999938965, "logps/chosen": -165.6617431640625, "logps/rejected": -192.40728759765625, "loss": 0.5457, "rewards/accuracies": 0.625, "rewards/chosen": -0.7802593111991882, "rewards/margins": 1.319090723991394, "rewards/rejected": -2.0993499755859375, "step": 8123 }, { "epoch": 0.94, "learning_rate": 1.931405829333958e-08, "logits/chosen": -3.534693479537964, "logits/rejected": -3.292240858078003, "logps/chosen": -218.77340698242188, "logps/rejected": -265.78814697265625, "loss": 0.4012, "rewards/accuracies": 0.75, "rewards/chosen": -0.46384087204933167, "rewards/margins": 2.0452182292938232, "rewards/rejected": -2.509059190750122, "step": 8124 }, { "epoch": 0.94, "learning_rate": 1.9278941823715323e-08, "logits/chosen": -3.7640581130981445, "logits/rejected": -3.467867851257324, "logps/chosen": -385.540283203125, "logps/rejected": -228.18069458007812, "loss": 0.1878, "rewards/accuracies": 1.0, "rewards/chosen": -0.10741977393627167, "rewards/margins": 1.786490559577942, "rewards/rejected": -1.89391028881073, "step": 8125 }, { "epoch": 0.94, "learning_rate": 1.9243825354091066e-08, "logits/chosen": -3.5181193351745605, "logits/rejected": -3.5929291248321533, "logps/chosen": -262.10601806640625, "logps/rejected": -197.092529296875, "loss": 0.3322, "rewards/accuracies": 0.75, "rewards/chosen": -0.31223592162132263, "rewards/margins": 1.8965249061584473, "rewards/rejected": -2.2087607383728027, "step": 8126 }, { "epoch": 0.94, "learning_rate": 1.9208708884466814e-08, "logits/chosen": -3.421161651611328, "logits/rejected": -3.5838851928710938, "logps/chosen": -170.16221618652344, "logps/rejected": -255.75076293945312, "loss": 0.1526, "rewards/accuracies": 1.0, "rewards/chosen": -0.04456467181444168, "rewards/margins": 3.1398234367370605, "rewards/rejected": -3.1843879222869873, "step": 8127 }, { "epoch": 0.94, "learning_rate": 1.9173592414842558e-08, "logits/chosen": -2.3176236152648926, "logits/rejected": -2.1563918590545654, "logps/chosen": -240.0767822265625, "logps/rejected": -258.974853515625, "loss": 0.6513, "rewards/accuracies": 0.625, "rewards/chosen": -0.2696719169616699, "rewards/margins": 0.33790165185928345, "rewards/rejected": -0.6075736284255981, "step": 8128 }, { "epoch": 0.94, "learning_rate": 1.9138475945218305e-08, "logits/chosen": -2.9639925956726074, "logits/rejected": -2.973402500152588, "logps/chosen": -116.26839447021484, "logps/rejected": -141.46078491210938, "loss": 0.3104, "rewards/accuracies": 0.875, "rewards/chosen": 0.05447874963283539, "rewards/margins": 2.318026542663574, "rewards/rejected": -2.263547897338867, "step": 8129 }, { "epoch": 0.94, "learning_rate": 1.9103359475594052e-08, "logits/chosen": -4.03908634185791, "logits/rejected": -4.012759685516357, "logps/chosen": -289.4230651855469, "logps/rejected": -247.8490447998047, "loss": 0.7654, "rewards/accuracies": 0.5, "rewards/chosen": -0.1473051905632019, "rewards/margins": 0.48383641242980957, "rewards/rejected": -0.6311416029930115, "step": 8130 }, { "epoch": 0.94, "learning_rate": 1.9068243005969796e-08, "logits/chosen": -2.9633941650390625, "logits/rejected": -2.961158514022827, "logps/chosen": -292.8026123046875, "logps/rejected": -254.13916015625, "loss": 0.3559, "rewards/accuracies": 0.875, "rewards/chosen": -0.768979549407959, "rewards/margins": 2.1012661457061768, "rewards/rejected": -2.8702456951141357, "step": 8131 }, { "epoch": 0.94, "learning_rate": 1.9033126536345543e-08, "logits/chosen": -3.38356351852417, "logits/rejected": -3.5959115028381348, "logps/chosen": -177.4767608642578, "logps/rejected": -252.30372619628906, "loss": 0.1878, "rewards/accuracies": 0.875, "rewards/chosen": 0.5052100419998169, "rewards/margins": 3.2442679405212402, "rewards/rejected": -2.739057779312134, "step": 8132 }, { "epoch": 0.94, "learning_rate": 1.899801006672129e-08, "logits/chosen": -3.400052070617676, "logits/rejected": -3.1680917739868164, "logps/chosen": -315.2216491699219, "logps/rejected": -230.8302764892578, "loss": 0.3741, "rewards/accuracies": 0.75, "rewards/chosen": 0.07330694794654846, "rewards/margins": 1.7744859457015991, "rewards/rejected": -1.701179027557373, "step": 8133 }, { "epoch": 0.94, "learning_rate": 1.8962893597097038e-08, "logits/chosen": -2.9619884490966797, "logits/rejected": -2.6542587280273438, "logps/chosen": -175.0492401123047, "logps/rejected": -209.59432983398438, "loss": 0.397, "rewards/accuracies": 0.875, "rewards/chosen": -0.22633367776870728, "rewards/margins": 1.3600375652313232, "rewards/rejected": -1.5863711833953857, "step": 8134 }, { "epoch": 0.94, "learning_rate": 1.8927777127472782e-08, "logits/chosen": -2.736616849899292, "logits/rejected": -2.919780731201172, "logps/chosen": -358.18084716796875, "logps/rejected": -241.42141723632812, "loss": 0.1703, "rewards/accuracies": 0.875, "rewards/chosen": 0.43369653820991516, "rewards/margins": 2.6027069091796875, "rewards/rejected": -2.1690104007720947, "step": 8135 }, { "epoch": 0.94, "learning_rate": 1.889266065784853e-08, "logits/chosen": -3.7158148288726807, "logits/rejected": -4.019084453582764, "logps/chosen": -242.95999145507812, "logps/rejected": -174.6965789794922, "loss": 0.4646, "rewards/accuracies": 0.75, "rewards/chosen": -0.42001909017562866, "rewards/margins": 1.176490306854248, "rewards/rejected": -1.5965094566345215, "step": 8136 }, { "epoch": 0.94, "learning_rate": 1.8857544188224276e-08, "logits/chosen": -2.791288375854492, "logits/rejected": -2.7436184883117676, "logps/chosen": -142.1248779296875, "logps/rejected": -220.20144653320312, "loss": 0.2555, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018380675464868546, "rewards/margins": 2.1187543869018555, "rewards/rejected": -2.1205928325653076, "step": 8137 }, { "epoch": 0.94, "learning_rate": 1.882242771860002e-08, "logits/chosen": -2.984785318374634, "logits/rejected": -2.998809814453125, "logps/chosen": -271.77630615234375, "logps/rejected": -366.782958984375, "loss": 0.57, "rewards/accuracies": 0.75, "rewards/chosen": -0.3985166847705841, "rewards/margins": 2.210306167602539, "rewards/rejected": -2.608822822570801, "step": 8138 }, { "epoch": 0.94, "learning_rate": 1.8787311248975767e-08, "logits/chosen": -3.2184832096099854, "logits/rejected": -3.0143752098083496, "logps/chosen": -224.8107147216797, "logps/rejected": -183.1089630126953, "loss": 0.3903, "rewards/accuracies": 0.875, "rewards/chosen": 0.11117149144411087, "rewards/margins": 0.9929934144020081, "rewards/rejected": -0.881821870803833, "step": 8139 }, { "epoch": 0.94, "learning_rate": 1.8752194779351515e-08, "logits/chosen": -2.6027615070343018, "logits/rejected": -2.5623464584350586, "logps/chosen": -261.7636413574219, "logps/rejected": -230.25262451171875, "loss": 0.5754, "rewards/accuracies": 0.625, "rewards/chosen": 0.022464334964752197, "rewards/margins": 1.288555383682251, "rewards/rejected": -1.266090989112854, "step": 8140 }, { "epoch": 0.94, "learning_rate": 1.8717078309727262e-08, "logits/chosen": -3.3297548294067383, "logits/rejected": -3.396092653274536, "logps/chosen": -207.54168701171875, "logps/rejected": -185.50167846679688, "loss": 0.2171, "rewards/accuracies": 1.0, "rewards/chosen": 0.09918709099292755, "rewards/margins": 2.2100629806518555, "rewards/rejected": -2.1108756065368652, "step": 8141 }, { "epoch": 0.94, "learning_rate": 1.8681961840103006e-08, "logits/chosen": -2.9688148498535156, "logits/rejected": -2.9483141899108887, "logps/chosen": -300.43670654296875, "logps/rejected": -267.80877685546875, "loss": 0.2533, "rewards/accuracies": 0.875, "rewards/chosen": 0.4452897012233734, "rewards/margins": 2.273124933242798, "rewards/rejected": -1.827835202217102, "step": 8142 }, { "epoch": 0.94, "learning_rate": 1.8646845370478753e-08, "logits/chosen": -3.6129331588745117, "logits/rejected": -3.3116116523742676, "logps/chosen": -294.37054443359375, "logps/rejected": -216.973876953125, "loss": 0.1441, "rewards/accuracies": 1.0, "rewards/chosen": 0.43749427795410156, "rewards/margins": 2.7185287475585938, "rewards/rejected": -2.281034469604492, "step": 8143 }, { "epoch": 0.94, "learning_rate": 1.86117289008545e-08, "logits/chosen": -2.6731138229370117, "logits/rejected": -2.5181496143341064, "logps/chosen": -322.41900634765625, "logps/rejected": -251.1419677734375, "loss": 0.5493, "rewards/accuracies": 0.75, "rewards/chosen": -0.3432113528251648, "rewards/margins": 1.4536491632461548, "rewards/rejected": -1.7968604564666748, "step": 8144 }, { "epoch": 0.94, "learning_rate": 1.8576612431230247e-08, "logits/chosen": -2.8262109756469727, "logits/rejected": -3.0960910320281982, "logps/chosen": -307.8598327636719, "logps/rejected": -188.70156860351562, "loss": 0.2412, "rewards/accuracies": 1.0, "rewards/chosen": 0.41191503405570984, "rewards/margins": 1.55869722366333, "rewards/rejected": -1.1467821598052979, "step": 8145 }, { "epoch": 0.94, "learning_rate": 1.854149596160599e-08, "logits/chosen": -2.7580485343933105, "logits/rejected": -2.6962392330169678, "logps/chosen": -466.1018981933594, "logps/rejected": -453.7894287109375, "loss": 0.547, "rewards/accuracies": 0.75, "rewards/chosen": -0.17873027920722961, "rewards/margins": 1.3551915884017944, "rewards/rejected": -1.5339218378067017, "step": 8146 }, { "epoch": 0.94, "learning_rate": 1.850637949198174e-08, "logits/chosen": -2.7847390174865723, "logits/rejected": -2.9441752433776855, "logps/chosen": -596.1390380859375, "logps/rejected": -404.5362548828125, "loss": 0.3587, "rewards/accuracies": 0.75, "rewards/chosen": -0.058462899178266525, "rewards/margins": 1.566924810409546, "rewards/rejected": -1.6253876686096191, "step": 8147 }, { "epoch": 0.94, "learning_rate": 1.8471263022357486e-08, "logits/chosen": -2.7857542037963867, "logits/rejected": -2.7415571212768555, "logps/chosen": -279.7179260253906, "logps/rejected": -203.29251098632812, "loss": 0.3486, "rewards/accuracies": 1.0, "rewards/chosen": -0.1678428053855896, "rewards/margins": 1.5028886795043945, "rewards/rejected": -1.670731544494629, "step": 8148 }, { "epoch": 0.94, "learning_rate": 1.843614655273323e-08, "logits/chosen": -2.673872947692871, "logits/rejected": -2.44907283782959, "logps/chosen": -297.9399719238281, "logps/rejected": -185.8231201171875, "loss": 0.418, "rewards/accuracies": 0.75, "rewards/chosen": -0.04347944259643555, "rewards/margins": 1.016410231590271, "rewards/rejected": -1.0598896741867065, "step": 8149 }, { "epoch": 0.94, "learning_rate": 1.8401030083108977e-08, "logits/chosen": -2.6014344692230225, "logits/rejected": -2.564140558242798, "logps/chosen": -155.03762817382812, "logps/rejected": -134.90716552734375, "loss": 0.4227, "rewards/accuracies": 0.75, "rewards/chosen": -0.14859744906425476, "rewards/margins": 1.2212873697280884, "rewards/rejected": -1.369884729385376, "step": 8150 }, { "epoch": 0.94, "learning_rate": 1.8365913613484724e-08, "logits/chosen": -3.5034735202789307, "logits/rejected": -3.323676109313965, "logps/chosen": -161.03878784179688, "logps/rejected": -275.11077880859375, "loss": 0.2628, "rewards/accuracies": 0.875, "rewards/chosen": 0.07247796654701233, "rewards/margins": 2.00203800201416, "rewards/rejected": -1.9295601844787598, "step": 8151 }, { "epoch": 0.94, "learning_rate": 1.833079714386047e-08, "logits/chosen": -2.9243712425231934, "logits/rejected": -2.9721269607543945, "logps/chosen": -201.64712524414062, "logps/rejected": -263.46392822265625, "loss": 0.8039, "rewards/accuracies": 0.625, "rewards/chosen": -0.5323455333709717, "rewards/margins": 0.42537960410118103, "rewards/rejected": -0.9577252268791199, "step": 8152 }, { "epoch": 0.94, "learning_rate": 1.8295680674236215e-08, "logits/chosen": -2.351823091506958, "logits/rejected": -2.4209532737731934, "logps/chosen": -368.16229248046875, "logps/rejected": -300.0963439941406, "loss": 0.2317, "rewards/accuracies": 0.875, "rewards/chosen": 0.40202292799949646, "rewards/margins": 2.6498379707336426, "rewards/rejected": -2.247814893722534, "step": 8153 }, { "epoch": 0.94, "learning_rate": 1.8260564204611963e-08, "logits/chosen": -2.4149374961853027, "logits/rejected": -2.7137088775634766, "logps/chosen": -394.4599914550781, "logps/rejected": -231.90199279785156, "loss": 0.6276, "rewards/accuracies": 0.625, "rewards/chosen": -0.5498688220977783, "rewards/margins": 0.9547281861305237, "rewards/rejected": -1.5045969486236572, "step": 8154 }, { "epoch": 0.94, "learning_rate": 1.822544773498771e-08, "logits/chosen": -2.857023239135742, "logits/rejected": -3.1355719566345215, "logps/chosen": -342.98846435546875, "logps/rejected": -254.0294647216797, "loss": 0.4672, "rewards/accuracies": 0.75, "rewards/chosen": 0.559821605682373, "rewards/margins": 1.3363882303237915, "rewards/rejected": -0.7765663862228394, "step": 8155 }, { "epoch": 0.94, "learning_rate": 1.8190331265363454e-08, "logits/chosen": -2.92746639251709, "logits/rejected": -3.1179733276367188, "logps/chosen": -235.04808044433594, "logps/rejected": -251.31405639648438, "loss": 0.856, "rewards/accuracies": 0.75, "rewards/chosen": -0.46951723098754883, "rewards/margins": 2.4779512882232666, "rewards/rejected": -2.9474685192108154, "step": 8156 }, { "epoch": 0.94, "learning_rate": 1.81552147957392e-08, "logits/chosen": -2.8115758895874023, "logits/rejected": -2.718144655227661, "logps/chosen": -315.5123596191406, "logps/rejected": -231.58616638183594, "loss": 0.809, "rewards/accuracies": 0.625, "rewards/chosen": -0.012514084577560425, "rewards/margins": 0.11164455115795135, "rewards/rejected": -0.12415864318609238, "step": 8157 }, { "epoch": 0.94, "learning_rate": 1.8120098326114948e-08, "logits/chosen": -2.4723544120788574, "logits/rejected": -2.8020567893981934, "logps/chosen": -151.33200073242188, "logps/rejected": -347.4915771484375, "loss": 0.5386, "rewards/accuracies": 0.625, "rewards/chosen": -0.3404717743396759, "rewards/margins": 1.9145132303237915, "rewards/rejected": -2.2549848556518555, "step": 8158 }, { "epoch": 0.94, "learning_rate": 1.8084981856490696e-08, "logits/chosen": -3.5449304580688477, "logits/rejected": -3.2956647872924805, "logps/chosen": -178.89576721191406, "logps/rejected": -149.06817626953125, "loss": 0.4503, "rewards/accuracies": 0.875, "rewards/chosen": -0.45487621426582336, "rewards/margins": 0.9965558052062988, "rewards/rejected": -1.4514319896697998, "step": 8159 }, { "epoch": 0.94, "learning_rate": 1.804986538686644e-08, "logits/chosen": -2.50209641456604, "logits/rejected": -2.763795852661133, "logps/chosen": -127.74301147460938, "logps/rejected": -211.9455108642578, "loss": 0.3722, "rewards/accuracies": 0.875, "rewards/chosen": 0.17317873239517212, "rewards/margins": 1.4406728744506836, "rewards/rejected": -1.2674942016601562, "step": 8160 }, { "epoch": 0.94, "learning_rate": 1.8014748917242183e-08, "logits/chosen": -2.9465103149414062, "logits/rejected": -3.0236661434173584, "logps/chosen": -409.89483642578125, "logps/rejected": -296.695556640625, "loss": 0.2591, "rewards/accuracies": 1.0, "rewards/chosen": 0.48401930928230286, "rewards/margins": 1.836775779724121, "rewards/rejected": -1.352756381034851, "step": 8161 }, { "epoch": 0.94, "learning_rate": 1.797963244761793e-08, "logits/chosen": -2.897360324859619, "logits/rejected": -2.8500776290893555, "logps/chosen": -324.31982421875, "logps/rejected": -337.29498291015625, "loss": 0.1885, "rewards/accuracies": 1.0, "rewards/chosen": 0.6375852823257446, "rewards/margins": 2.9714462757110596, "rewards/rejected": -2.3338608741760254, "step": 8162 }, { "epoch": 0.94, "learning_rate": 1.7944515977993678e-08, "logits/chosen": -3.034310817718506, "logits/rejected": -3.0834858417510986, "logps/chosen": -110.35882568359375, "logps/rejected": -210.45535278320312, "loss": 0.2405, "rewards/accuracies": 0.875, "rewards/chosen": 0.12174283713102341, "rewards/margins": 1.8761634826660156, "rewards/rejected": -1.7544206380844116, "step": 8163 }, { "epoch": 0.94, "learning_rate": 1.7909399508369425e-08, "logits/chosen": -2.7571284770965576, "logits/rejected": -2.7454986572265625, "logps/chosen": -326.7891845703125, "logps/rejected": -257.5616760253906, "loss": 0.3561, "rewards/accuracies": 1.0, "rewards/chosen": -0.25400757789611816, "rewards/margins": 1.0092096328735352, "rewards/rejected": -1.2632172107696533, "step": 8164 }, { "epoch": 0.94, "learning_rate": 1.787428303874517e-08, "logits/chosen": -3.312713146209717, "logits/rejected": -3.6067419052124023, "logps/chosen": -143.99403381347656, "logps/rejected": -285.6132507324219, "loss": 0.5473, "rewards/accuracies": 0.625, "rewards/chosen": -0.5546338558197021, "rewards/margins": 1.685487151145935, "rewards/rejected": -2.2401211261749268, "step": 8165 }, { "epoch": 0.94, "learning_rate": 1.7839166569120916e-08, "logits/chosen": -3.53360652923584, "logits/rejected": -3.6113157272338867, "logps/chosen": -238.14706420898438, "logps/rejected": -166.36260986328125, "loss": 0.3297, "rewards/accuracies": 0.875, "rewards/chosen": 0.361843466758728, "rewards/margins": 1.5868897438049316, "rewards/rejected": -1.2250462770462036, "step": 8166 }, { "epoch": 0.94, "learning_rate": 1.7804050099496663e-08, "logits/chosen": -2.629732370376587, "logits/rejected": -3.026935338973999, "logps/chosen": -219.40087890625, "logps/rejected": -378.3924865722656, "loss": 0.3162, "rewards/accuracies": 0.875, "rewards/chosen": -0.5205529928207397, "rewards/margins": 2.3323090076446533, "rewards/rejected": -2.8528618812561035, "step": 8167 }, { "epoch": 0.94, "learning_rate": 1.7768933629872407e-08, "logits/chosen": -3.10290789604187, "logits/rejected": -3.0232598781585693, "logps/chosen": -322.5345458984375, "logps/rejected": -238.12452697753906, "loss": 0.2803, "rewards/accuracies": 0.875, "rewards/chosen": 0.18800771236419678, "rewards/margins": 1.5264841318130493, "rewards/rejected": -1.3384764194488525, "step": 8168 }, { "epoch": 0.94, "learning_rate": 1.7733817160248155e-08, "logits/chosen": -3.1708736419677734, "logits/rejected": -3.0764541625976562, "logps/chosen": -200.05615234375, "logps/rejected": -261.7193298339844, "loss": 0.2795, "rewards/accuracies": 0.875, "rewards/chosen": -0.47360551357269287, "rewards/margins": 1.803372859954834, "rewards/rejected": -2.2769784927368164, "step": 8169 }, { "epoch": 0.94, "learning_rate": 1.7698700690623902e-08, "logits/chosen": -2.2533557415008545, "logits/rejected": -2.1916022300720215, "logps/chosen": -301.38616943359375, "logps/rejected": -361.1244201660156, "loss": 0.2665, "rewards/accuracies": 0.875, "rewards/chosen": 0.0018864348530769348, "rewards/margins": 2.1028456687927246, "rewards/rejected": -2.100959062576294, "step": 8170 }, { "epoch": 0.94, "learning_rate": 1.766358422099965e-08, "logits/chosen": -3.0362954139709473, "logits/rejected": -2.822458028793335, "logps/chosen": -392.6993408203125, "logps/rejected": -269.53594970703125, "loss": 0.3256, "rewards/accuracies": 0.875, "rewards/chosen": 0.2884213626384735, "rewards/margins": 2.2868733406066895, "rewards/rejected": -1.9984519481658936, "step": 8171 }, { "epoch": 0.94, "learning_rate": 1.7628467751375393e-08, "logits/chosen": -2.7963531017303467, "logits/rejected": -2.7931082248687744, "logps/chosen": -383.13519287109375, "logps/rejected": -346.6846618652344, "loss": 0.6855, "rewards/accuracies": 0.5, "rewards/chosen": -0.7914069294929504, "rewards/margins": 0.9207128286361694, "rewards/rejected": -1.7121198177337646, "step": 8172 }, { "epoch": 0.94, "learning_rate": 1.759335128175114e-08, "logits/chosen": -3.661818027496338, "logits/rejected": -3.416332960128784, "logps/chosen": -185.7296142578125, "logps/rejected": -202.80609130859375, "loss": 0.4081, "rewards/accuracies": 0.75, "rewards/chosen": -0.07354666292667389, "rewards/margins": 1.349813461303711, "rewards/rejected": -1.4233601093292236, "step": 8173 }, { "epoch": 0.94, "learning_rate": 1.7558234812126888e-08, "logits/chosen": -3.4523391723632812, "logits/rejected": -3.334061861038208, "logps/chosen": -574.772705078125, "logps/rejected": -419.60467529296875, "loss": 0.3704, "rewards/accuracies": 0.875, "rewards/chosen": -0.0849202573299408, "rewards/margins": 1.9473963975906372, "rewards/rejected": -2.0323164463043213, "step": 8174 }, { "epoch": 0.94, "learning_rate": 1.7523118342502635e-08, "logits/chosen": -3.256793260574341, "logits/rejected": -2.9841067790985107, "logps/chosen": -272.07562255859375, "logps/rejected": -221.69589233398438, "loss": 0.9939, "rewards/accuracies": 0.75, "rewards/chosen": -1.089319109916687, "rewards/margins": 0.5585801601409912, "rewards/rejected": -1.6478992700576782, "step": 8175 }, { "epoch": 0.94, "learning_rate": 1.748800187287838e-08, "logits/chosen": -2.5723648071289062, "logits/rejected": -2.6450517177581787, "logps/chosen": -347.0587463378906, "logps/rejected": -333.13818359375, "loss": 0.3151, "rewards/accuracies": 0.75, "rewards/chosen": 0.37328076362609863, "rewards/margins": 1.8505440950393677, "rewards/rejected": -1.477263331413269, "step": 8176 }, { "epoch": 0.94, "learning_rate": 1.7452885403254126e-08, "logits/chosen": -2.684037446975708, "logits/rejected": -2.6587769985198975, "logps/chosen": -500.7362060546875, "logps/rejected": -431.42626953125, "loss": 0.2988, "rewards/accuracies": 0.75, "rewards/chosen": 0.2750774323940277, "rewards/margins": 2.128095865249634, "rewards/rejected": -1.8530184030532837, "step": 8177 }, { "epoch": 0.94, "learning_rate": 1.7417768933629873e-08, "logits/chosen": -3.833390235900879, "logits/rejected": -3.879261016845703, "logps/chosen": -185.5983428955078, "logps/rejected": -222.092041015625, "loss": 0.2668, "rewards/accuracies": 0.875, "rewards/chosen": 0.41515523195266724, "rewards/margins": 1.5582764148712158, "rewards/rejected": -1.1431212425231934, "step": 8178 }, { "epoch": 0.94, "learning_rate": 1.7382652464005617e-08, "logits/chosen": -3.3049867153167725, "logits/rejected": -3.2110989093780518, "logps/chosen": -261.9088134765625, "logps/rejected": -246.26809692382812, "loss": 0.4917, "rewards/accuracies": 0.75, "rewards/chosen": -0.47261250019073486, "rewards/margins": 2.517286539077759, "rewards/rejected": -2.989898920059204, "step": 8179 }, { "epoch": 0.94, "learning_rate": 1.7347535994381364e-08, "logits/chosen": -3.209726095199585, "logits/rejected": -3.222686767578125, "logps/chosen": -209.04864501953125, "logps/rejected": -301.27691650390625, "loss": 0.5382, "rewards/accuracies": 0.75, "rewards/chosen": -0.42358940839767456, "rewards/margins": 1.8804998397827148, "rewards/rejected": -2.304089307785034, "step": 8180 }, { "epoch": 0.94, "learning_rate": 1.731241952475711e-08, "logits/chosen": -2.595937728881836, "logits/rejected": -2.6520159244537354, "logps/chosen": -396.0236511230469, "logps/rejected": -202.24176025390625, "loss": 0.7467, "rewards/accuracies": 0.75, "rewards/chosen": 0.1522451490163803, "rewards/margins": 0.8624707460403442, "rewards/rejected": -0.7102255821228027, "step": 8181 }, { "epoch": 0.94, "learning_rate": 1.727730305513286e-08, "logits/chosen": -3.465639352798462, "logits/rejected": -3.437981128692627, "logps/chosen": -262.19952392578125, "logps/rejected": -211.46751403808594, "loss": 0.1536, "rewards/accuracies": 1.0, "rewards/chosen": -0.060538604855537415, "rewards/margins": 2.710240125656128, "rewards/rejected": -2.7707791328430176, "step": 8182 }, { "epoch": 0.94, "learning_rate": 1.7242186585508603e-08, "logits/chosen": -2.7431929111480713, "logits/rejected": -3.160226821899414, "logps/chosen": -133.91766357421875, "logps/rejected": -138.53933715820312, "loss": 0.3126, "rewards/accuracies": 1.0, "rewards/chosen": 0.03880004957318306, "rewards/margins": 1.5290675163269043, "rewards/rejected": -1.490267276763916, "step": 8183 }, { "epoch": 0.94, "learning_rate": 1.720707011588435e-08, "logits/chosen": -3.4638404846191406, "logits/rejected": -3.4181041717529297, "logps/chosen": -178.20632934570312, "logps/rejected": -222.76104736328125, "loss": 0.3095, "rewards/accuracies": 0.75, "rewards/chosen": -0.14339207112789154, "rewards/margins": 4.471451759338379, "rewards/rejected": -4.614843368530273, "step": 8184 }, { "epoch": 0.94, "learning_rate": 1.7171953646260094e-08, "logits/chosen": -2.735933303833008, "logits/rejected": -2.6350550651550293, "logps/chosen": -395.56610107421875, "logps/rejected": -303.74664306640625, "loss": 0.2445, "rewards/accuracies": 0.875, "rewards/chosen": 0.4905344247817993, "rewards/margins": 2.201305389404297, "rewards/rejected": -1.710770845413208, "step": 8185 }, { "epoch": 0.94, "learning_rate": 1.713683717663584e-08, "logits/chosen": -3.640927791595459, "logits/rejected": -3.537898063659668, "logps/chosen": -235.9541015625, "logps/rejected": -238.00704956054688, "loss": 0.1763, "rewards/accuracies": 1.0, "rewards/chosen": 0.3299994170665741, "rewards/margins": 2.1851584911346436, "rewards/rejected": -1.855159044265747, "step": 8186 }, { "epoch": 0.94, "learning_rate": 1.710172070701159e-08, "logits/chosen": -3.670579195022583, "logits/rejected": -3.31243896484375, "logps/chosen": -400.0976257324219, "logps/rejected": -166.83251953125, "loss": 0.2419, "rewards/accuracies": 1.0, "rewards/chosen": -0.15896055102348328, "rewards/margins": 1.7964537143707275, "rewards/rejected": -1.9554142951965332, "step": 8187 }, { "epoch": 0.94, "learning_rate": 1.7066604237387332e-08, "logits/chosen": -3.256181478500366, "logits/rejected": -3.586435317993164, "logps/chosen": -145.37689208984375, "logps/rejected": -237.75535583496094, "loss": 0.3289, "rewards/accuracies": 0.875, "rewards/chosen": 0.08352944254875183, "rewards/margins": 3.1627519130706787, "rewards/rejected": -3.0792226791381836, "step": 8188 }, { "epoch": 0.94, "learning_rate": 1.703148776776308e-08, "logits/chosen": -3.524756669998169, "logits/rejected": -3.858372688293457, "logps/chosen": -302.4464416503906, "logps/rejected": -386.4407653808594, "loss": 0.8982, "rewards/accuracies": 0.625, "rewards/chosen": -1.2480573654174805, "rewards/margins": 2.2425270080566406, "rewards/rejected": -3.490584373474121, "step": 8189 }, { "epoch": 0.94, "learning_rate": 1.6996371298138827e-08, "logits/chosen": -3.6286473274230957, "logits/rejected": -3.801705837249756, "logps/chosen": -443.7227783203125, "logps/rejected": -249.3665771484375, "loss": 0.7725, "rewards/accuracies": 0.875, "rewards/chosen": -0.3813709616661072, "rewards/margins": 1.7577624320983887, "rewards/rejected": -2.1391334533691406, "step": 8190 }, { "epoch": 0.94, "learning_rate": 1.696125482851457e-08, "logits/chosen": -3.1533098220825195, "logits/rejected": -3.043942451477051, "logps/chosen": -198.21463012695312, "logps/rejected": -251.66473388671875, "loss": 0.4345, "rewards/accuracies": 0.75, "rewards/chosen": 0.058684855699539185, "rewards/margins": 1.3391234874725342, "rewards/rejected": -1.2804384231567383, "step": 8191 }, { "epoch": 0.94, "learning_rate": 1.6926138358890318e-08, "logits/chosen": -3.1368894577026367, "logits/rejected": -3.2552740573883057, "logps/chosen": -87.27498626708984, "logps/rejected": -183.72869873046875, "loss": 0.7744, "rewards/accuracies": 0.375, "rewards/chosen": -0.4817124903202057, "rewards/margins": 0.7488498687744141, "rewards/rejected": -1.2305623292922974, "step": 8192 }, { "epoch": 0.94, "learning_rate": 1.6891021889266065e-08, "logits/chosen": -3.318396806716919, "logits/rejected": -3.300485849380493, "logps/chosen": -301.31329345703125, "logps/rejected": -219.75338745117188, "loss": 0.6437, "rewards/accuracies": 0.625, "rewards/chosen": -0.3271428346633911, "rewards/margins": 1.127153992652893, "rewards/rejected": -1.4542968273162842, "step": 8193 }, { "epoch": 0.94, "learning_rate": 1.6855905419641812e-08, "logits/chosen": -3.243210792541504, "logits/rejected": -3.2204103469848633, "logps/chosen": -200.63470458984375, "logps/rejected": -205.8739013671875, "loss": 0.202, "rewards/accuracies": 1.0, "rewards/chosen": 0.08668699860572815, "rewards/margins": 2.152761936187744, "rewards/rejected": -2.066074848175049, "step": 8194 }, { "epoch": 0.94, "learning_rate": 1.6820788950017556e-08, "logits/chosen": -3.393320322036743, "logits/rejected": -2.99713134765625, "logps/chosen": -163.70123291015625, "logps/rejected": -167.90652465820312, "loss": 0.4367, "rewards/accuracies": 0.875, "rewards/chosen": -0.3250158727169037, "rewards/margins": 1.6998326778411865, "rewards/rejected": -2.024848699569702, "step": 8195 }, { "epoch": 0.94, "learning_rate": 1.6785672480393304e-08, "logits/chosen": -3.2904388904571533, "logits/rejected": -2.957132339477539, "logps/chosen": -280.9899597167969, "logps/rejected": -197.3377227783203, "loss": 0.4804, "rewards/accuracies": 0.875, "rewards/chosen": -0.47176849842071533, "rewards/margins": 1.6066287755966187, "rewards/rejected": -2.078397274017334, "step": 8196 }, { "epoch": 0.94, "learning_rate": 1.675055601076905e-08, "logits/chosen": -2.3939108848571777, "logits/rejected": -2.9260315895080566, "logps/chosen": -188.6431884765625, "logps/rejected": -229.49127197265625, "loss": 0.1688, "rewards/accuracies": 1.0, "rewards/chosen": 0.5344288349151611, "rewards/margins": 2.6310200691223145, "rewards/rejected": -2.096590995788574, "step": 8197 }, { "epoch": 0.95, "learning_rate": 1.6715439541144795e-08, "logits/chosen": -2.4816136360168457, "logits/rejected": -2.743485689163208, "logps/chosen": -327.7810974121094, "logps/rejected": -314.2917785644531, "loss": 0.3389, "rewards/accuracies": 0.75, "rewards/chosen": 0.1691344678401947, "rewards/margins": 2.4031965732574463, "rewards/rejected": -2.2340621948242188, "step": 8198 }, { "epoch": 0.95, "learning_rate": 1.6680323071520542e-08, "logits/chosen": -2.8620526790618896, "logits/rejected": -3.2579879760742188, "logps/chosen": -330.3363342285156, "logps/rejected": -312.0775146484375, "loss": 0.2308, "rewards/accuracies": 0.875, "rewards/chosen": 0.17177648842334747, "rewards/margins": 3.5783941745758057, "rewards/rejected": -3.4066176414489746, "step": 8199 }, { "epoch": 0.95, "learning_rate": 1.664520660189629e-08, "logits/chosen": -2.812788724899292, "logits/rejected": -2.8027877807617188, "logps/chosen": -82.58345031738281, "logps/rejected": -134.635986328125, "loss": 0.4916, "rewards/accuracies": 0.75, "rewards/chosen": -0.4537752866744995, "rewards/margins": 0.7132229208946228, "rewards/rejected": -1.1669981479644775, "step": 8200 }, { "epoch": 0.95, "learning_rate": 1.6610090132272036e-08, "logits/chosen": -3.2601284980773926, "logits/rejected": -3.1930699348449707, "logps/chosen": -132.95388793945312, "logps/rejected": -147.77536010742188, "loss": 0.2757, "rewards/accuracies": 1.0, "rewards/chosen": 0.32066333293914795, "rewards/margins": 1.5845098495483398, "rewards/rejected": -1.2638466358184814, "step": 8201 }, { "epoch": 0.95, "learning_rate": 1.657497366264778e-08, "logits/chosen": -3.490626811981201, "logits/rejected": -3.728221893310547, "logps/chosen": -303.9148864746094, "logps/rejected": -290.1959228515625, "loss": 0.4674, "rewards/accuracies": 0.875, "rewards/chosen": -0.24737754464149475, "rewards/margins": 1.1534066200256348, "rewards/rejected": -1.4007842540740967, "step": 8202 }, { "epoch": 0.95, "learning_rate": 1.6539857193023528e-08, "logits/chosen": -3.2871265411376953, "logits/rejected": -3.294301748275757, "logps/chosen": -141.0595703125, "logps/rejected": -173.48016357421875, "loss": 0.6767, "rewards/accuracies": 0.75, "rewards/chosen": -0.46653860807418823, "rewards/margins": 1.390503168106079, "rewards/rejected": -1.857041835784912, "step": 8203 }, { "epoch": 0.95, "learning_rate": 1.6504740723399275e-08, "logits/chosen": -2.704057216644287, "logits/rejected": -3.1224329471588135, "logps/chosen": -163.2122802734375, "logps/rejected": -219.3159637451172, "loss": 0.5343, "rewards/accuracies": 0.625, "rewards/chosen": -0.9955257177352905, "rewards/margins": 2.045006036758423, "rewards/rejected": -3.040531635284424, "step": 8204 }, { "epoch": 0.95, "learning_rate": 1.6469624253775022e-08, "logits/chosen": -3.3526034355163574, "logits/rejected": -3.325371742248535, "logps/chosen": -171.75274658203125, "logps/rejected": -278.3505554199219, "loss": 0.3932, "rewards/accuracies": 0.875, "rewards/chosen": -0.3134555518627167, "rewards/margins": 2.2649595737457275, "rewards/rejected": -2.5784149169921875, "step": 8205 }, { "epoch": 0.95, "learning_rate": 1.6434507784150766e-08, "logits/chosen": -3.753615379333496, "logits/rejected": -3.6616721153259277, "logps/chosen": -172.57952880859375, "logps/rejected": -182.27658081054688, "loss": 0.2312, "rewards/accuracies": 1.0, "rewards/chosen": -0.08016112446784973, "rewards/margins": 1.9451558589935303, "rewards/rejected": -2.0253169536590576, "step": 8206 }, { "epoch": 0.95, "learning_rate": 1.6399391314526513e-08, "logits/chosen": -3.644166946411133, "logits/rejected": -3.450709819793701, "logps/chosen": -201.61097717285156, "logps/rejected": -220.2666778564453, "loss": 0.2283, "rewards/accuracies": 0.875, "rewards/chosen": 0.05219858139753342, "rewards/margins": 2.7639944553375244, "rewards/rejected": -2.711796283721924, "step": 8207 }, { "epoch": 0.95, "learning_rate": 1.636427484490226e-08, "logits/chosen": -3.025402069091797, "logits/rejected": -3.0377516746520996, "logps/chosen": -264.8951721191406, "logps/rejected": -281.4089660644531, "loss": 0.3123, "rewards/accuracies": 0.875, "rewards/chosen": 0.18848800659179688, "rewards/margins": 1.495556354522705, "rewards/rejected": -1.3070685863494873, "step": 8208 }, { "epoch": 0.95, "learning_rate": 1.6329158375278004e-08, "logits/chosen": -3.087926149368286, "logits/rejected": -3.294440269470215, "logps/chosen": -166.95907592773438, "logps/rejected": -190.9630889892578, "loss": 0.1973, "rewards/accuracies": 0.875, "rewards/chosen": 0.06519429385662079, "rewards/margins": 2.155423641204834, "rewards/rejected": -2.0902292728424072, "step": 8209 }, { "epoch": 0.95, "learning_rate": 1.629404190565375e-08, "logits/chosen": -3.0046486854553223, "logits/rejected": -3.112730026245117, "logps/chosen": -159.9574432373047, "logps/rejected": -266.94891357421875, "loss": 0.1826, "rewards/accuracies": 0.875, "rewards/chosen": 0.14056234061717987, "rewards/margins": 2.934500217437744, "rewards/rejected": -2.7939376831054688, "step": 8210 }, { "epoch": 0.95, "learning_rate": 1.6258925436029496e-08, "logits/chosen": -3.131894588470459, "logits/rejected": -2.923311233520508, "logps/chosen": -296.8849182128906, "logps/rejected": -166.84710693359375, "loss": 0.5148, "rewards/accuracies": 0.75, "rewards/chosen": 0.41574665904045105, "rewards/margins": 0.9659462571144104, "rewards/rejected": -0.5501996278762817, "step": 8211 }, { "epoch": 0.95, "learning_rate": 1.6223808966405243e-08, "logits/chosen": -3.120729923248291, "logits/rejected": -3.2750959396362305, "logps/chosen": -105.42279052734375, "logps/rejected": -158.7264404296875, "loss": 0.5977, "rewards/accuracies": 0.5, "rewards/chosen": -0.41063058376312256, "rewards/margins": 1.134529709815979, "rewards/rejected": -1.5451604127883911, "step": 8212 }, { "epoch": 0.95, "learning_rate": 1.618869249678099e-08, "logits/chosen": -3.105663776397705, "logits/rejected": -3.2051539421081543, "logps/chosen": -244.02313232421875, "logps/rejected": -297.44482421875, "loss": 0.2166, "rewards/accuracies": 0.875, "rewards/chosen": 0.13897782564163208, "rewards/margins": 2.947629690170288, "rewards/rejected": -2.808651924133301, "step": 8213 }, { "epoch": 0.95, "learning_rate": 1.6153576027156734e-08, "logits/chosen": -2.5704214572906494, "logits/rejected": -3.129823923110962, "logps/chosen": -279.55010986328125, "logps/rejected": -291.8548583984375, "loss": 0.4326, "rewards/accuracies": 0.625, "rewards/chosen": -0.40502822399139404, "rewards/margins": 1.835604190826416, "rewards/rejected": -2.2406325340270996, "step": 8214 }, { "epoch": 0.95, "learning_rate": 1.611845955753248e-08, "logits/chosen": -2.762256145477295, "logits/rejected": -2.810194969177246, "logps/chosen": -173.51962280273438, "logps/rejected": -203.73468017578125, "loss": 0.5491, "rewards/accuracies": 0.875, "rewards/chosen": 0.021342262625694275, "rewards/margins": 1.500236988067627, "rewards/rejected": -1.478894829750061, "step": 8215 }, { "epoch": 0.95, "learning_rate": 1.608334308790823e-08, "logits/chosen": -2.7437829971313477, "logits/rejected": -2.751807689666748, "logps/chosen": -207.1459503173828, "logps/rejected": -152.3746337890625, "loss": 0.2796, "rewards/accuracies": 0.875, "rewards/chosen": 0.474897176027298, "rewards/margins": 2.1351046562194824, "rewards/rejected": -1.6602072715759277, "step": 8216 }, { "epoch": 0.95, "learning_rate": 1.6048226618283976e-08, "logits/chosen": -2.2935750484466553, "logits/rejected": -2.3654778003692627, "logps/chosen": -182.8057403564453, "logps/rejected": -225.74911499023438, "loss": 0.5102, "rewards/accuracies": 0.75, "rewards/chosen": 0.26263201236724854, "rewards/margins": 1.0653802156448364, "rewards/rejected": -0.8027482032775879, "step": 8217 }, { "epoch": 0.95, "learning_rate": 1.601311014865972e-08, "logits/chosen": -3.118767261505127, "logits/rejected": -3.4249792098999023, "logps/chosen": -172.46945190429688, "logps/rejected": -174.78443908691406, "loss": 0.323, "rewards/accuracies": 0.875, "rewards/chosen": -0.034323014318943024, "rewards/margins": 1.9493227005004883, "rewards/rejected": -1.9836456775665283, "step": 8218 }, { "epoch": 0.95, "learning_rate": 1.5977993679035467e-08, "logits/chosen": -2.883897304534912, "logits/rejected": -2.988283634185791, "logps/chosen": -129.30409240722656, "logps/rejected": -227.33428955078125, "loss": 0.5017, "rewards/accuracies": 0.75, "rewards/chosen": -0.5410102605819702, "rewards/margins": 0.7723912000656128, "rewards/rejected": -1.3134015798568726, "step": 8219 }, { "epoch": 0.95, "learning_rate": 1.5942877209411214e-08, "logits/chosen": -3.0430986881256104, "logits/rejected": -3.0488839149475098, "logps/chosen": -225.2998046875, "logps/rejected": -224.41421508789062, "loss": 0.6459, "rewards/accuracies": 0.625, "rewards/chosen": -0.2981608808040619, "rewards/margins": 0.6007200479507446, "rewards/rejected": -0.8988809585571289, "step": 8220 }, { "epoch": 0.95, "learning_rate": 1.5907760739786958e-08, "logits/chosen": -2.785719394683838, "logits/rejected": -2.9434454441070557, "logps/chosen": -252.40362548828125, "logps/rejected": -299.7442932128906, "loss": 0.2048, "rewards/accuracies": 1.0, "rewards/chosen": 0.3780643343925476, "rewards/margins": 3.18212890625, "rewards/rejected": -2.8040645122528076, "step": 8221 }, { "epoch": 0.95, "learning_rate": 1.5872644270162705e-08, "logits/chosen": -3.126917839050293, "logits/rejected": -3.0756540298461914, "logps/chosen": -236.4794464111328, "logps/rejected": -270.8515319824219, "loss": 0.3246, "rewards/accuracies": 0.75, "rewards/chosen": -0.08024759590625763, "rewards/margins": 1.9774391651153564, "rewards/rejected": -2.0576870441436768, "step": 8222 }, { "epoch": 0.95, "learning_rate": 1.5837527800538453e-08, "logits/chosen": -2.2370827198028564, "logits/rejected": -2.388514518737793, "logps/chosen": -236.5407257080078, "logps/rejected": -129.404296875, "loss": 0.2127, "rewards/accuracies": 1.0, "rewards/chosen": 0.21934986114501953, "rewards/margins": 2.1705026626586914, "rewards/rejected": -1.9511528015136719, "step": 8223 }, { "epoch": 0.95, "learning_rate": 1.58024113309142e-08, "logits/chosen": -2.834913730621338, "logits/rejected": -2.779510259628296, "logps/chosen": -371.0307922363281, "logps/rejected": -344.0577087402344, "loss": 0.1636, "rewards/accuracies": 1.0, "rewards/chosen": 0.30353468656539917, "rewards/margins": 2.19862699508667, "rewards/rejected": -1.895092248916626, "step": 8224 }, { "epoch": 0.95, "learning_rate": 1.5767294861289944e-08, "logits/chosen": -2.446337938308716, "logits/rejected": -2.3566110134124756, "logps/chosen": -201.25814819335938, "logps/rejected": -187.96548461914062, "loss": 0.3248, "rewards/accuracies": 0.875, "rewards/chosen": -0.12175148725509644, "rewards/margins": 1.5445765256881714, "rewards/rejected": -1.6663278341293335, "step": 8225 }, { "epoch": 0.95, "learning_rate": 1.573217839166569e-08, "logits/chosen": -3.3017666339874268, "logits/rejected": -3.2026782035827637, "logps/chosen": -283.6519775390625, "logps/rejected": -380.90179443359375, "loss": 0.1236, "rewards/accuracies": 1.0, "rewards/chosen": 0.2574186325073242, "rewards/margins": 3.3695621490478516, "rewards/rejected": -3.1121435165405273, "step": 8226 }, { "epoch": 0.95, "learning_rate": 1.5697061922041438e-08, "logits/chosen": -2.8463644981384277, "logits/rejected": -2.6944236755371094, "logps/chosen": -507.3891906738281, "logps/rejected": -300.4443054199219, "loss": 0.5968, "rewards/accuracies": 0.75, "rewards/chosen": -0.3947167694568634, "rewards/margins": 1.0157959461212158, "rewards/rejected": -1.4105126857757568, "step": 8227 }, { "epoch": 0.95, "learning_rate": 1.5661945452417182e-08, "logits/chosen": -2.275399684906006, "logits/rejected": -2.474114418029785, "logps/chosen": -360.2444763183594, "logps/rejected": -243.35043334960938, "loss": 0.6993, "rewards/accuracies": 0.625, "rewards/chosen": -0.5487843751907349, "rewards/margins": 0.24798502027988434, "rewards/rejected": -0.796769380569458, "step": 8228 }, { "epoch": 0.95, "learning_rate": 1.562682898279293e-08, "logits/chosen": -3.0128846168518066, "logits/rejected": -2.742933511734009, "logps/chosen": -220.90921020507812, "logps/rejected": -184.084228515625, "loss": 0.2275, "rewards/accuracies": 0.875, "rewards/chosen": -0.2848397493362427, "rewards/margins": 2.683603048324585, "rewards/rejected": -2.968442678451538, "step": 8229 }, { "epoch": 0.95, "learning_rate": 1.5591712513168677e-08, "logits/chosen": -3.1469240188598633, "logits/rejected": -3.0178396701812744, "logps/chosen": -303.40191650390625, "logps/rejected": -184.71502685546875, "loss": 0.2653, "rewards/accuracies": 1.0, "rewards/chosen": 0.10997083783149719, "rewards/margins": 2.4358434677124023, "rewards/rejected": -2.3258726596832275, "step": 8230 }, { "epoch": 0.95, "learning_rate": 1.5556596043544424e-08, "logits/chosen": -2.72654390335083, "logits/rejected": -2.601412534713745, "logps/chosen": -278.7293701171875, "logps/rejected": -315.77655029296875, "loss": 0.5713, "rewards/accuracies": 0.625, "rewards/chosen": -0.03932797908782959, "rewards/margins": 1.9768569469451904, "rewards/rejected": -2.0161850452423096, "step": 8231 }, { "epoch": 0.95, "learning_rate": 1.5521479573920168e-08, "logits/chosen": -3.7213213443756104, "logits/rejected": -3.971909999847412, "logps/chosen": -178.84634399414062, "logps/rejected": -198.11172485351562, "loss": 0.5061, "rewards/accuracies": 0.75, "rewards/chosen": -0.5730245113372803, "rewards/margins": 1.2351694107055664, "rewards/rejected": -1.8081941604614258, "step": 8232 }, { "epoch": 0.95, "learning_rate": 1.5486363104295912e-08, "logits/chosen": -3.7951862812042236, "logits/rejected": -3.7009530067443848, "logps/chosen": -242.45762634277344, "logps/rejected": -275.89495849609375, "loss": 0.4082, "rewards/accuracies": 0.75, "rewards/chosen": -0.259093701839447, "rewards/margins": 1.7944221496582031, "rewards/rejected": -2.053515672683716, "step": 8233 }, { "epoch": 0.95, "learning_rate": 1.545124663467166e-08, "logits/chosen": -2.8726401329040527, "logits/rejected": -2.84236216545105, "logps/chosen": -296.1202392578125, "logps/rejected": -271.3155822753906, "loss": 0.574, "rewards/accuracies": 0.625, "rewards/chosen": -0.26624467968940735, "rewards/margins": 2.0868430137634277, "rewards/rejected": -2.3530876636505127, "step": 8234 }, { "epoch": 0.95, "learning_rate": 1.5416130165047406e-08, "logits/chosen": -2.4935855865478516, "logits/rejected": -3.1072025299072266, "logps/chosen": -304.6036376953125, "logps/rejected": -353.6661376953125, "loss": 0.3499, "rewards/accuracies": 0.875, "rewards/chosen": -0.034298986196517944, "rewards/margins": 2.2546679973602295, "rewards/rejected": -2.2889671325683594, "step": 8235 }, { "epoch": 0.95, "learning_rate": 1.5381013695423153e-08, "logits/chosen": -2.435995578765869, "logits/rejected": -2.6241252422332764, "logps/chosen": -205.0628204345703, "logps/rejected": -156.2589111328125, "loss": 0.2906, "rewards/accuracies": 0.875, "rewards/chosen": -0.029611259698867798, "rewards/margins": 2.0836033821105957, "rewards/rejected": -2.1132144927978516, "step": 8236 }, { "epoch": 0.95, "learning_rate": 1.5345897225798897e-08, "logits/chosen": -2.708845376968384, "logits/rejected": -2.888165235519409, "logps/chosen": -348.1418151855469, "logps/rejected": -158.44781494140625, "loss": 1.1795, "rewards/accuracies": 0.75, "rewards/chosen": -1.1869009733200073, "rewards/margins": 0.34230929613113403, "rewards/rejected": -1.5292103290557861, "step": 8237 }, { "epoch": 0.95, "learning_rate": 1.5310780756174645e-08, "logits/chosen": -3.2474470138549805, "logits/rejected": -3.3787484169006348, "logps/chosen": -142.2418212890625, "logps/rejected": -164.19589233398438, "loss": 0.4073, "rewards/accuracies": 0.625, "rewards/chosen": 0.28977325558662415, "rewards/margins": 1.8503177165985107, "rewards/rejected": -1.5605443716049194, "step": 8238 }, { "epoch": 0.95, "learning_rate": 1.5275664286550392e-08, "logits/chosen": -3.128875970840454, "logits/rejected": -3.309847354888916, "logps/chosen": -258.1601867675781, "logps/rejected": -110.73875427246094, "loss": 0.4387, "rewards/accuracies": 0.75, "rewards/chosen": -0.3814738988876343, "rewards/margins": 0.7139061689376831, "rewards/rejected": -1.0953800678253174, "step": 8239 }, { "epoch": 0.95, "learning_rate": 1.5240547816926136e-08, "logits/chosen": -2.668130397796631, "logits/rejected": -2.4263410568237305, "logps/chosen": -267.9188537597656, "logps/rejected": -339.1885986328125, "loss": 0.4191, "rewards/accuracies": 0.75, "rewards/chosen": -0.01929982751607895, "rewards/margins": 1.0208733081817627, "rewards/rejected": -1.0401731729507446, "step": 8240 }, { "epoch": 0.95, "learning_rate": 1.5205431347301883e-08, "logits/chosen": -2.8403642177581787, "logits/rejected": -2.65486741065979, "logps/chosen": -219.2029266357422, "logps/rejected": -122.58714294433594, "loss": 0.5096, "rewards/accuracies": 0.625, "rewards/chosen": 0.18359404802322388, "rewards/margins": 1.0307050943374634, "rewards/rejected": -0.8471111059188843, "step": 8241 }, { "epoch": 0.95, "learning_rate": 1.517031487767763e-08, "logits/chosen": -2.969252109527588, "logits/rejected": -2.7769699096679688, "logps/chosen": -253.57852172851562, "logps/rejected": -266.30499267578125, "loss": 0.2753, "rewards/accuracies": 0.875, "rewards/chosen": -0.17603209614753723, "rewards/margins": 2.0415267944335938, "rewards/rejected": -2.2175586223602295, "step": 8242 }, { "epoch": 0.95, "learning_rate": 1.5135198408053377e-08, "logits/chosen": -3.2662341594696045, "logits/rejected": -3.611793041229248, "logps/chosen": -186.13198852539062, "logps/rejected": -200.3729248046875, "loss": 0.3665, "rewards/accuracies": 0.875, "rewards/chosen": -0.11456766724586487, "rewards/margins": 1.9970074892044067, "rewards/rejected": -2.111575126647949, "step": 8243 }, { "epoch": 0.95, "learning_rate": 1.510008193842912e-08, "logits/chosen": -2.9553093910217285, "logits/rejected": -2.90592360496521, "logps/chosen": -293.00225830078125, "logps/rejected": -285.9473876953125, "loss": 0.5508, "rewards/accuracies": 0.75, "rewards/chosen": -0.6524166464805603, "rewards/margins": 1.545724630355835, "rewards/rejected": -2.198141574859619, "step": 8244 }, { "epoch": 0.95, "learning_rate": 1.506496546880487e-08, "logits/chosen": -2.9966237545013428, "logits/rejected": -2.892843008041382, "logps/chosen": -197.93243408203125, "logps/rejected": -277.37359619140625, "loss": 0.3205, "rewards/accuracies": 0.875, "rewards/chosen": 0.1410657912492752, "rewards/margins": 3.0153555870056152, "rewards/rejected": -2.8742895126342773, "step": 8245 }, { "epoch": 0.95, "learning_rate": 1.5029848999180616e-08, "logits/chosen": -3.777984380722046, "logits/rejected": -3.8629801273345947, "logps/chosen": -241.86729431152344, "logps/rejected": -167.6749267578125, "loss": 0.2298, "rewards/accuracies": 1.0, "rewards/chosen": 0.862123966217041, "rewards/margins": 2.0270814895629883, "rewards/rejected": -1.1649575233459473, "step": 8246 }, { "epoch": 0.95, "learning_rate": 1.4994732529556363e-08, "logits/chosen": -2.833996534347534, "logits/rejected": -2.5663673877716064, "logps/chosen": -145.63504028320312, "logps/rejected": -269.19964599609375, "loss": 0.1439, "rewards/accuracies": 1.0, "rewards/chosen": 0.09467703104019165, "rewards/margins": 3.088162422180176, "rewards/rejected": -2.993485450744629, "step": 8247 }, { "epoch": 0.95, "learning_rate": 1.4959616059932107e-08, "logits/chosen": -3.387742042541504, "logits/rejected": -3.565957546234131, "logps/chosen": -184.724365234375, "logps/rejected": -293.4902648925781, "loss": 0.2867, "rewards/accuracies": 0.875, "rewards/chosen": 0.07957261055707932, "rewards/margins": 2.0599169731140137, "rewards/rejected": -1.9803444147109985, "step": 8248 }, { "epoch": 0.95, "learning_rate": 1.4924499590307854e-08, "logits/chosen": -3.854768991470337, "logits/rejected": -3.729292392730713, "logps/chosen": -189.33538818359375, "logps/rejected": -227.29953002929688, "loss": 0.6, "rewards/accuracies": 0.75, "rewards/chosen": -0.4908967614173889, "rewards/margins": 1.4630119800567627, "rewards/rejected": -1.9539086818695068, "step": 8249 }, { "epoch": 0.95, "learning_rate": 1.48893831206836e-08, "logits/chosen": -3.515562057495117, "logits/rejected": -2.7844552993774414, "logps/chosen": -224.0501708984375, "logps/rejected": -258.92803955078125, "loss": 0.4064, "rewards/accuracies": 0.75, "rewards/chosen": -0.6445354223251343, "rewards/margins": 1.1457926034927368, "rewards/rejected": -1.790328025817871, "step": 8250 }, { "epoch": 0.95, "learning_rate": 1.4854266651059347e-08, "logits/chosen": -3.609443426132202, "logits/rejected": -3.56705904006958, "logps/chosen": -140.5050048828125, "logps/rejected": -265.04144287109375, "loss": 0.185, "rewards/accuracies": 0.875, "rewards/chosen": 0.40054553747177124, "rewards/margins": 3.872006416320801, "rewards/rejected": -3.4714608192443848, "step": 8251 }, { "epoch": 0.95, "learning_rate": 1.4819150181435093e-08, "logits/chosen": -3.971944570541382, "logits/rejected": -3.5025181770324707, "logps/chosen": -244.74334716796875, "logps/rejected": -254.66751098632812, "loss": 0.5662, "rewards/accuracies": 0.875, "rewards/chosen": -0.4298558533191681, "rewards/margins": 1.6099966764450073, "rewards/rejected": -2.0398526191711426, "step": 8252 }, { "epoch": 0.95, "learning_rate": 1.478403371181084e-08, "logits/chosen": -3.149454355239868, "logits/rejected": -2.756873607635498, "logps/chosen": -365.79693603515625, "logps/rejected": -260.39788818359375, "loss": 0.2832, "rewards/accuracies": 0.875, "rewards/chosen": 0.4141916334629059, "rewards/margins": 1.5371320247650146, "rewards/rejected": -1.1229404211044312, "step": 8253 }, { "epoch": 0.95, "learning_rate": 1.4748917242186585e-08, "logits/chosen": -2.5218136310577393, "logits/rejected": -2.6424124240875244, "logps/chosen": -379.86309814453125, "logps/rejected": -305.38531494140625, "loss": 0.5845, "rewards/accuracies": 0.875, "rewards/chosen": -0.5088021755218506, "rewards/margins": 1.1483252048492432, "rewards/rejected": -1.6571276187896729, "step": 8254 }, { "epoch": 0.95, "learning_rate": 1.4713800772562333e-08, "logits/chosen": -3.7103357315063477, "logits/rejected": -3.9747860431671143, "logps/chosen": -156.41409301757812, "logps/rejected": -281.7643737792969, "loss": 0.28, "rewards/accuracies": 1.0, "rewards/chosen": 0.007502416148781776, "rewards/margins": 1.765620231628418, "rewards/rejected": -1.7581177949905396, "step": 8255 }, { "epoch": 0.95, "learning_rate": 1.4678684302938078e-08, "logits/chosen": -3.572471857070923, "logits/rejected": -3.675083637237549, "logps/chosen": -308.5960388183594, "logps/rejected": -331.8685302734375, "loss": 0.3326, "rewards/accuracies": 0.75, "rewards/chosen": 0.03995952010154724, "rewards/margins": 2.750368595123291, "rewards/rejected": -2.710409164428711, "step": 8256 }, { "epoch": 0.95, "learning_rate": 1.4643567833313822e-08, "logits/chosen": -3.8916492462158203, "logits/rejected": -3.590463638305664, "logps/chosen": -421.23779296875, "logps/rejected": -418.8205871582031, "loss": 0.2585, "rewards/accuracies": 0.875, "rewards/chosen": -0.07219311594963074, "rewards/margins": 3.73925518989563, "rewards/rejected": -3.811448335647583, "step": 8257 }, { "epoch": 0.95, "learning_rate": 1.460845136368957e-08, "logits/chosen": -3.0521113872528076, "logits/rejected": -2.9243834018707275, "logps/chosen": -192.96823120117188, "logps/rejected": -208.53640747070312, "loss": 0.3646, "rewards/accuracies": 0.875, "rewards/chosen": -0.40024542808532715, "rewards/margins": 1.7781167030334473, "rewards/rejected": -2.1783618927001953, "step": 8258 }, { "epoch": 0.95, "learning_rate": 1.4573334894065315e-08, "logits/chosen": -3.2460811138153076, "logits/rejected": -3.2191967964172363, "logps/chosen": -209.1927490234375, "logps/rejected": -147.4943389892578, "loss": 0.8816, "rewards/accuracies": 0.5, "rewards/chosen": -0.7279817461967468, "rewards/margins": 0.7797703742980957, "rewards/rejected": -1.5077519416809082, "step": 8259 }, { "epoch": 0.95, "learning_rate": 1.4538218424441062e-08, "logits/chosen": -2.7057747840881348, "logits/rejected": -2.4894022941589355, "logps/chosen": -275.1524963378906, "logps/rejected": -209.80001831054688, "loss": 0.3148, "rewards/accuracies": 1.0, "rewards/chosen": -0.08709488809108734, "rewards/margins": 1.449735164642334, "rewards/rejected": -1.536830186843872, "step": 8260 }, { "epoch": 0.95, "learning_rate": 1.4503101954816808e-08, "logits/chosen": -2.748729705810547, "logits/rejected": -2.8736796379089355, "logps/chosen": -210.43716430664062, "logps/rejected": -296.22357177734375, "loss": 0.1464, "rewards/accuracies": 1.0, "rewards/chosen": 0.5601373314857483, "rewards/margins": 3.757070302963257, "rewards/rejected": -3.1969330310821533, "step": 8261 }, { "epoch": 0.95, "learning_rate": 1.4467985485192553e-08, "logits/chosen": -3.3517367839813232, "logits/rejected": -3.4727675914764404, "logps/chosen": -305.001953125, "logps/rejected": -287.24066162109375, "loss": 0.4119, "rewards/accuracies": 0.75, "rewards/chosen": -0.3569161295890808, "rewards/margins": 1.2706716060638428, "rewards/rejected": -1.6275877952575684, "step": 8262 }, { "epoch": 0.95, "learning_rate": 1.44328690155683e-08, "logits/chosen": -2.910613536834717, "logits/rejected": -3.02905011177063, "logps/chosen": -169.02452087402344, "logps/rejected": -266.8596496582031, "loss": 0.3417, "rewards/accuracies": 0.875, "rewards/chosen": -0.008430056273937225, "rewards/margins": 1.4330143928527832, "rewards/rejected": -1.4414445161819458, "step": 8263 }, { "epoch": 0.95, "learning_rate": 1.4397752545944046e-08, "logits/chosen": -3.5216925144195557, "logits/rejected": -3.5592074394226074, "logps/chosen": -128.70835876464844, "logps/rejected": -168.03363037109375, "loss": 0.8629, "rewards/accuracies": 0.5, "rewards/chosen": 0.1049141138792038, "rewards/margins": 0.3693433403968811, "rewards/rejected": -0.2644291818141937, "step": 8264 }, { "epoch": 0.95, "learning_rate": 1.4362636076319793e-08, "logits/chosen": -3.4989237785339355, "logits/rejected": -3.4006950855255127, "logps/chosen": -333.2518005371094, "logps/rejected": -228.68106079101562, "loss": 0.3657, "rewards/accuracies": 0.875, "rewards/chosen": 0.6153453588485718, "rewards/margins": 2.057711601257324, "rewards/rejected": -1.442366123199463, "step": 8265 }, { "epoch": 0.95, "learning_rate": 1.4327519606695539e-08, "logits/chosen": -3.5452423095703125, "logits/rejected": -3.2625255584716797, "logps/chosen": -268.45330810546875, "logps/rejected": -294.48663330078125, "loss": 0.2059, "rewards/accuracies": 1.0, "rewards/chosen": 0.4794980585575104, "rewards/margins": 3.1246089935302734, "rewards/rejected": -2.645111083984375, "step": 8266 }, { "epoch": 0.95, "learning_rate": 1.4292403137071286e-08, "logits/chosen": -3.2553274631500244, "logits/rejected": -3.2313218116760254, "logps/chosen": -315.8193054199219, "logps/rejected": -218.30810546875, "loss": 0.3406, "rewards/accuracies": 0.75, "rewards/chosen": 0.2441224902868271, "rewards/margins": 2.1272685527801514, "rewards/rejected": -1.8831461668014526, "step": 8267 }, { "epoch": 0.95, "learning_rate": 1.4257286667447032e-08, "logits/chosen": -3.1642954349517822, "logits/rejected": -3.3857221603393555, "logps/chosen": -260.1934814453125, "logps/rejected": -357.06890869140625, "loss": 0.412, "rewards/accuracies": 0.875, "rewards/chosen": -0.41890451312065125, "rewards/margins": 2.520261287689209, "rewards/rejected": -2.9391658306121826, "step": 8268 }, { "epoch": 0.95, "learning_rate": 1.4222170197822779e-08, "logits/chosen": -2.790724277496338, "logits/rejected": -3.0167055130004883, "logps/chosen": -284.969482421875, "logps/rejected": -282.53106689453125, "loss": 0.2764, "rewards/accuracies": 0.75, "rewards/chosen": 0.5384602546691895, "rewards/margins": 2.9212517738342285, "rewards/rejected": -2.382791519165039, "step": 8269 }, { "epoch": 0.95, "learning_rate": 1.4187053728198525e-08, "logits/chosen": -3.781431198120117, "logits/rejected": -4.054003715515137, "logps/chosen": -115.28715515136719, "logps/rejected": -165.31942749023438, "loss": 0.253, "rewards/accuracies": 1.0, "rewards/chosen": 0.6309633851051331, "rewards/margins": 2.374691963195801, "rewards/rejected": -1.743728518486023, "step": 8270 }, { "epoch": 0.95, "learning_rate": 1.415193725857427e-08, "logits/chosen": -3.2740638256073, "logits/rejected": -3.4012904167175293, "logps/chosen": -206.34042358398438, "logps/rejected": -292.7893371582031, "loss": 0.446, "rewards/accuracies": 0.875, "rewards/chosen": -0.6463313698768616, "rewards/margins": 1.5236260890960693, "rewards/rejected": -2.1699576377868652, "step": 8271 }, { "epoch": 0.95, "learning_rate": 1.4116820788950018e-08, "logits/chosen": -3.063417434692383, "logits/rejected": -3.140129327774048, "logps/chosen": -143.380615234375, "logps/rejected": -207.98114013671875, "loss": 0.3841, "rewards/accuracies": 0.875, "rewards/chosen": -0.38790908455848694, "rewards/margins": 1.7792935371398926, "rewards/rejected": -2.1672024726867676, "step": 8272 }, { "epoch": 0.95, "learning_rate": 1.4081704319325763e-08, "logits/chosen": -2.428615093231201, "logits/rejected": -2.602424144744873, "logps/chosen": -485.16888427734375, "logps/rejected": -377.37774658203125, "loss": 0.3829, "rewards/accuracies": 0.875, "rewards/chosen": 0.3134302496910095, "rewards/margins": 2.264782190322876, "rewards/rejected": -1.9513520002365112, "step": 8273 }, { "epoch": 0.95, "learning_rate": 1.404658784970151e-08, "logits/chosen": -2.636791467666626, "logits/rejected": -2.4307098388671875, "logps/chosen": -153.61888122558594, "logps/rejected": -244.98312377929688, "loss": 0.4869, "rewards/accuracies": 0.75, "rewards/chosen": 0.005350708961486816, "rewards/margins": 0.7849456667900085, "rewards/rejected": -0.7795950174331665, "step": 8274 }, { "epoch": 0.95, "learning_rate": 1.4011471380077256e-08, "logits/chosen": -2.2900447845458984, "logits/rejected": -2.266974925994873, "logps/chosen": -125.2791519165039, "logps/rejected": -223.76040649414062, "loss": 0.4536, "rewards/accuracies": 0.625, "rewards/chosen": -0.07715275883674622, "rewards/margins": 1.082786202430725, "rewards/rejected": -1.159938931465149, "step": 8275 }, { "epoch": 0.95, "learning_rate": 1.3976354910453003e-08, "logits/chosen": -3.1984896659851074, "logits/rejected": -3.1118476390838623, "logps/chosen": -140.6183624267578, "logps/rejected": -118.84224700927734, "loss": 0.3359, "rewards/accuracies": 0.875, "rewards/chosen": 0.54518723487854, "rewards/margins": 1.178221583366394, "rewards/rejected": -0.633034348487854, "step": 8276 }, { "epoch": 0.95, "learning_rate": 1.3941238440828749e-08, "logits/chosen": -3.314821481704712, "logits/rejected": -2.9306371212005615, "logps/chosen": -212.38771057128906, "logps/rejected": -163.811767578125, "loss": 0.7723, "rewards/accuracies": 0.625, "rewards/chosen": -0.583911120891571, "rewards/margins": 0.9184215068817139, "rewards/rejected": -1.5023326873779297, "step": 8277 }, { "epoch": 0.95, "learning_rate": 1.3906121971204494e-08, "logits/chosen": -2.8416147232055664, "logits/rejected": -2.804090976715088, "logps/chosen": -290.3372802734375, "logps/rejected": -122.77336120605469, "loss": 0.783, "rewards/accuracies": 0.5, "rewards/chosen": -0.23467686772346497, "rewards/margins": 0.8077397346496582, "rewards/rejected": -1.0424166917800903, "step": 8278 }, { "epoch": 0.95, "learning_rate": 1.3871005501580242e-08, "logits/chosen": -2.028742551803589, "logits/rejected": -1.8264014720916748, "logps/chosen": -275.3419494628906, "logps/rejected": -289.8919982910156, "loss": 0.2094, "rewards/accuracies": 1.0, "rewards/chosen": -0.23118162155151367, "rewards/margins": 2.138932466506958, "rewards/rejected": -2.3701140880584717, "step": 8279 }, { "epoch": 0.95, "learning_rate": 1.3835889031955987e-08, "logits/chosen": -2.86384916305542, "logits/rejected": -2.870595932006836, "logps/chosen": -239.38780212402344, "logps/rejected": -259.69989013671875, "loss": 0.4389, "rewards/accuracies": 0.875, "rewards/chosen": -0.2971067428588867, "rewards/margins": 1.757723331451416, "rewards/rejected": -2.0548300743103027, "step": 8280 }, { "epoch": 0.95, "learning_rate": 1.3800772562331733e-08, "logits/chosen": -3.0230178833007812, "logits/rejected": -2.7238361835479736, "logps/chosen": -347.1412048339844, "logps/rejected": -359.8379821777344, "loss": 0.5643, "rewards/accuracies": 0.75, "rewards/chosen": 0.1111907958984375, "rewards/margins": 0.7488512396812439, "rewards/rejected": -0.6376605033874512, "step": 8281 }, { "epoch": 0.95, "learning_rate": 1.3765656092707478e-08, "logits/chosen": -2.8979930877685547, "logits/rejected": -2.738325834274292, "logps/chosen": -185.16453552246094, "logps/rejected": -247.8653564453125, "loss": 0.4805, "rewards/accuracies": 0.75, "rewards/chosen": -0.1412678360939026, "rewards/margins": 2.1107494831085205, "rewards/rejected": -2.2520174980163574, "step": 8282 }, { "epoch": 0.95, "learning_rate": 1.3730539623083224e-08, "logits/chosen": -3.6049342155456543, "logits/rejected": -3.4563145637512207, "logps/chosen": -228.02239990234375, "logps/rejected": -198.47314453125, "loss": 0.3996, "rewards/accuracies": 0.75, "rewards/chosen": -0.237548828125, "rewards/margins": 1.7041387557983398, "rewards/rejected": -1.9416875839233398, "step": 8283 }, { "epoch": 0.95, "learning_rate": 1.3695423153458971e-08, "logits/chosen": -3.3590359687805176, "logits/rejected": -3.6160755157470703, "logps/chosen": -180.7003936767578, "logps/rejected": -231.48350524902344, "loss": 0.3915, "rewards/accuracies": 0.625, "rewards/chosen": 0.018077760934829712, "rewards/margins": 2.9970061779022217, "rewards/rejected": -2.978928565979004, "step": 8284 }, { "epoch": 0.96, "learning_rate": 1.3660306683834717e-08, "logits/chosen": -3.661324977874756, "logits/rejected": -3.793510675430298, "logps/chosen": -207.0034942626953, "logps/rejected": -245.124755859375, "loss": 0.4738, "rewards/accuracies": 0.75, "rewards/chosen": -0.8031078577041626, "rewards/margins": 1.8221588134765625, "rewards/rejected": -2.6252665519714355, "step": 8285 }, { "epoch": 0.96, "learning_rate": 1.3625190214210464e-08, "logits/chosen": -3.059300422668457, "logits/rejected": -3.1600863933563232, "logps/chosen": -291.9253845214844, "logps/rejected": -336.7073669433594, "loss": 0.2281, "rewards/accuracies": 1.0, "rewards/chosen": 0.42295241355895996, "rewards/margins": 2.7965152263641357, "rewards/rejected": -2.373562812805176, "step": 8286 }, { "epoch": 0.96, "learning_rate": 1.359007374458621e-08, "logits/chosen": -3.0627851486206055, "logits/rejected": -3.184109687805176, "logps/chosen": -239.51913452148438, "logps/rejected": -265.8860168457031, "loss": 0.4993, "rewards/accuracies": 0.75, "rewards/chosen": 0.5561214685440063, "rewards/margins": 2.0229475498199463, "rewards/rejected": -1.466826319694519, "step": 8287 }, { "epoch": 0.96, "learning_rate": 1.3554957274961957e-08, "logits/chosen": -3.5015735626220703, "logits/rejected": -3.482973098754883, "logps/chosen": -356.20782470703125, "logps/rejected": -387.2317810058594, "loss": 0.2822, "rewards/accuracies": 0.75, "rewards/chosen": -0.10837945342063904, "rewards/margins": 2.7391717433929443, "rewards/rejected": -2.847551107406616, "step": 8288 }, { "epoch": 0.96, "learning_rate": 1.3519840805337702e-08, "logits/chosen": -3.9006190299987793, "logits/rejected": -3.545283079147339, "logps/chosen": -137.8027801513672, "logps/rejected": -112.09077453613281, "loss": 0.3349, "rewards/accuracies": 0.75, "rewards/chosen": -0.10106566548347473, "rewards/margins": 1.4865074157714844, "rewards/rejected": -1.5875732898712158, "step": 8289 }, { "epoch": 0.96, "learning_rate": 1.348472433571345e-08, "logits/chosen": -3.612732410430908, "logits/rejected": -3.5788369178771973, "logps/chosen": -409.31854248046875, "logps/rejected": -362.3607177734375, "loss": 0.3418, "rewards/accuracies": 0.75, "rewards/chosen": 0.4367539882659912, "rewards/margins": 2.784336805343628, "rewards/rejected": -2.3475828170776367, "step": 8290 }, { "epoch": 0.96, "learning_rate": 1.3449607866089195e-08, "logits/chosen": -3.499504566192627, "logits/rejected": -3.363041877746582, "logps/chosen": -197.84814453125, "logps/rejected": -271.4588623046875, "loss": 0.3097, "rewards/accuracies": 1.0, "rewards/chosen": -0.20790208876132965, "rewards/margins": 2.007131338119507, "rewards/rejected": -2.215033531188965, "step": 8291 }, { "epoch": 0.96, "learning_rate": 1.341449139646494e-08, "logits/chosen": -3.470132827758789, "logits/rejected": -3.461972236633301, "logps/chosen": -272.3686828613281, "logps/rejected": -290.0356140136719, "loss": 0.2732, "rewards/accuracies": 0.875, "rewards/chosen": 0.13070176541805267, "rewards/margins": 1.9671165943145752, "rewards/rejected": -1.8364146947860718, "step": 8292 }, { "epoch": 0.96, "learning_rate": 1.3379374926840688e-08, "logits/chosen": -3.9186582565307617, "logits/rejected": -3.721738576889038, "logps/chosen": -261.04840087890625, "logps/rejected": -218.58486938476562, "loss": 0.5936, "rewards/accuracies": 0.75, "rewards/chosen": -0.3003842532634735, "rewards/margins": 1.6091973781585693, "rewards/rejected": -1.9095815420150757, "step": 8293 }, { "epoch": 0.96, "learning_rate": 1.3344258457216434e-08, "logits/chosen": -3.291627883911133, "logits/rejected": -3.216060161590576, "logps/chosen": -293.1528015136719, "logps/rejected": -197.66122436523438, "loss": 0.7025, "rewards/accuracies": 0.75, "rewards/chosen": -0.1359265297651291, "rewards/margins": 1.0045456886291504, "rewards/rejected": -1.1404720544815063, "step": 8294 }, { "epoch": 0.96, "learning_rate": 1.330914198759218e-08, "logits/chosen": -2.7195615768432617, "logits/rejected": -2.7487967014312744, "logps/chosen": -565.5466918945312, "logps/rejected": -322.7362976074219, "loss": 0.4274, "rewards/accuracies": 0.875, "rewards/chosen": -0.09694815427064896, "rewards/margins": 0.8577335476875305, "rewards/rejected": -0.9546816349029541, "step": 8295 }, { "epoch": 0.96, "learning_rate": 1.3274025517967926e-08, "logits/chosen": -3.1486330032348633, "logits/rejected": -2.9082202911376953, "logps/chosen": -167.2576904296875, "logps/rejected": -240.68621826171875, "loss": 0.8943, "rewards/accuracies": 0.75, "rewards/chosen": -0.8826574087142944, "rewards/margins": 0.7086774110794067, "rewards/rejected": -1.5913348197937012, "step": 8296 }, { "epoch": 0.96, "learning_rate": 1.3238909048343674e-08, "logits/chosen": -3.296518325805664, "logits/rejected": -3.0827856063842773, "logps/chosen": -263.598876953125, "logps/rejected": -175.9322052001953, "loss": 0.2601, "rewards/accuracies": 1.0, "rewards/chosen": 0.4610040783882141, "rewards/margins": 1.9768823385238647, "rewards/rejected": -1.5158780813217163, "step": 8297 }, { "epoch": 0.96, "learning_rate": 1.320379257871942e-08, "logits/chosen": -2.710878849029541, "logits/rejected": -2.6290817260742188, "logps/chosen": -426.05328369140625, "logps/rejected": -350.56524658203125, "loss": 0.5538, "rewards/accuracies": 0.75, "rewards/chosen": 0.14546924829483032, "rewards/margins": 1.0578967332839966, "rewards/rejected": -0.9124274849891663, "step": 8298 }, { "epoch": 0.96, "learning_rate": 1.3168676109095165e-08, "logits/chosen": -2.803089141845703, "logits/rejected": -3.2952938079833984, "logps/chosen": -103.19120788574219, "logps/rejected": -247.6085205078125, "loss": 0.277, "rewards/accuracies": 0.875, "rewards/chosen": 0.28170379996299744, "rewards/margins": 2.675689697265625, "rewards/rejected": -2.3939859867095947, "step": 8299 }, { "epoch": 0.96, "learning_rate": 1.3133559639470912e-08, "logits/chosen": -3.584475040435791, "logits/rejected": -3.5499305725097656, "logps/chosen": -400.31414794921875, "logps/rejected": -341.586669921875, "loss": 0.2138, "rewards/accuracies": 1.0, "rewards/chosen": -0.13942933082580566, "rewards/margins": 2.5025081634521484, "rewards/rejected": -2.641937494277954, "step": 8300 }, { "epoch": 0.96, "learning_rate": 1.3098443169846658e-08, "logits/chosen": -2.8413326740264893, "logits/rejected": -2.682149887084961, "logps/chosen": -216.79803466796875, "logps/rejected": -309.5220642089844, "loss": 0.2351, "rewards/accuracies": 0.875, "rewards/chosen": -0.0944717526435852, "rewards/margins": 2.5069546699523926, "rewards/rejected": -2.601426601409912, "step": 8301 }, { "epoch": 0.96, "learning_rate": 1.3063326700222405e-08, "logits/chosen": -3.6985836029052734, "logits/rejected": -3.6557083129882812, "logps/chosen": -69.42398834228516, "logps/rejected": -100.91555786132812, "loss": 0.4295, "rewards/accuracies": 0.75, "rewards/chosen": 0.07189862430095673, "rewards/margins": 0.9814072847366333, "rewards/rejected": -0.9095085859298706, "step": 8302 }, { "epoch": 0.96, "learning_rate": 1.302821023059815e-08, "logits/chosen": -3.2567996978759766, "logits/rejected": -3.0298914909362793, "logps/chosen": -191.20367431640625, "logps/rejected": -272.88104248046875, "loss": 0.1055, "rewards/accuracies": 1.0, "rewards/chosen": -0.4948158264160156, "rewards/margins": 3.167259693145752, "rewards/rejected": -3.6620755195617676, "step": 8303 }, { "epoch": 0.96, "learning_rate": 1.2993093760973898e-08, "logits/chosen": -3.045347213745117, "logits/rejected": -3.3705902099609375, "logps/chosen": -146.20303344726562, "logps/rejected": -318.67999267578125, "loss": 0.2656, "rewards/accuracies": 0.875, "rewards/chosen": -0.06272351741790771, "rewards/margins": 2.9151148796081543, "rewards/rejected": -2.9778380393981934, "step": 8304 }, { "epoch": 0.96, "learning_rate": 1.2957977291349642e-08, "logits/chosen": -3.063415050506592, "logits/rejected": -3.241237163543701, "logps/chosen": -277.8468933105469, "logps/rejected": -174.09793090820312, "loss": 0.5373, "rewards/accuracies": 0.625, "rewards/chosen": -0.5477906465530396, "rewards/margins": 0.6145612001419067, "rewards/rejected": -1.1623518466949463, "step": 8305 }, { "epoch": 0.96, "learning_rate": 1.2922860821725387e-08, "logits/chosen": -3.0128707885742188, "logits/rejected": -3.206698179244995, "logps/chosen": -138.36163330078125, "logps/rejected": -272.8291015625, "loss": 0.4451, "rewards/accuracies": 0.75, "rewards/chosen": -0.18411535024642944, "rewards/margins": 2.2222652435302734, "rewards/rejected": -2.4063804149627686, "step": 8306 }, { "epoch": 0.96, "learning_rate": 1.2887744352101134e-08, "logits/chosen": -3.059342384338379, "logits/rejected": -3.111412286758423, "logps/chosen": -124.45681762695312, "logps/rejected": -197.85507202148438, "loss": 0.3832, "rewards/accuracies": 0.875, "rewards/chosen": -0.37650084495544434, "rewards/margins": 1.5740798711776733, "rewards/rejected": -1.9505807161331177, "step": 8307 }, { "epoch": 0.96, "learning_rate": 1.285262788247688e-08, "logits/chosen": -3.0233023166656494, "logits/rejected": -2.9006476402282715, "logps/chosen": -239.59161376953125, "logps/rejected": -383.79150390625, "loss": 0.2248, "rewards/accuracies": 0.875, "rewards/chosen": 0.2367362231016159, "rewards/margins": 3.467280626296997, "rewards/rejected": -3.230544328689575, "step": 8308 }, { "epoch": 0.96, "learning_rate": 1.2817511412852627e-08, "logits/chosen": -3.470857620239258, "logits/rejected": -3.424672842025757, "logps/chosen": -210.60536193847656, "logps/rejected": -171.46722412109375, "loss": 0.331, "rewards/accuracies": 1.0, "rewards/chosen": -0.15453150868415833, "rewards/margins": 1.0666377544403076, "rewards/rejected": -1.2211692333221436, "step": 8309 }, { "epoch": 0.96, "learning_rate": 1.2782394943228373e-08, "logits/chosen": -2.7635385990142822, "logits/rejected": -2.736882448196411, "logps/chosen": -296.8058776855469, "logps/rejected": -227.1547088623047, "loss": 0.2848, "rewards/accuracies": 0.875, "rewards/chosen": -0.2156868875026703, "rewards/margins": 1.8027162551879883, "rewards/rejected": -2.0184030532836914, "step": 8310 }, { "epoch": 0.96, "learning_rate": 1.274727847360412e-08, "logits/chosen": -2.369112730026245, "logits/rejected": -2.338441848754883, "logps/chosen": -425.47381591796875, "logps/rejected": -286.86572265625, "loss": 0.4302, "rewards/accuracies": 0.875, "rewards/chosen": -0.0032943710684776306, "rewards/margins": 1.1478815078735352, "rewards/rejected": -1.1511757373809814, "step": 8311 }, { "epoch": 0.96, "learning_rate": 1.2712162003979866e-08, "logits/chosen": -3.441132068634033, "logits/rejected": -3.3137896060943604, "logps/chosen": -318.3377380371094, "logps/rejected": -438.000732421875, "loss": 0.1665, "rewards/accuracies": 1.0, "rewards/chosen": 0.08224048465490341, "rewards/margins": 3.488095283508301, "rewards/rejected": -3.4058547019958496, "step": 8312 }, { "epoch": 0.96, "learning_rate": 1.2677045534355611e-08, "logits/chosen": -4.161600112915039, "logits/rejected": -3.9250497817993164, "logps/chosen": -161.736572265625, "logps/rejected": -188.98956298828125, "loss": 0.1127, "rewards/accuracies": 1.0, "rewards/chosen": 0.3939509689807892, "rewards/margins": 3.1528501510620117, "rewards/rejected": -2.758898973464966, "step": 8313 }, { "epoch": 0.96, "learning_rate": 1.2641929064731358e-08, "logits/chosen": -3.1986632347106934, "logits/rejected": -2.898909330368042, "logps/chosen": -221.6466064453125, "logps/rejected": -174.53518676757812, "loss": 0.3667, "rewards/accuracies": 0.875, "rewards/chosen": -0.3208311200141907, "rewards/margins": 1.4786393642425537, "rewards/rejected": -1.7994705438613892, "step": 8314 }, { "epoch": 0.96, "learning_rate": 1.2606812595107104e-08, "logits/chosen": -3.0905911922454834, "logits/rejected": -3.028481960296631, "logps/chosen": -213.78160095214844, "logps/rejected": -174.30029296875, "loss": 0.5756, "rewards/accuracies": 0.75, "rewards/chosen": 0.017976611852645874, "rewards/margins": 0.6575988531112671, "rewards/rejected": -0.6396222710609436, "step": 8315 }, { "epoch": 0.96, "learning_rate": 1.2571696125482851e-08, "logits/chosen": -3.1392083168029785, "logits/rejected": -2.5463080406188965, "logps/chosen": -284.0442199707031, "logps/rejected": -200.0133056640625, "loss": 0.3426, "rewards/accuracies": 0.875, "rewards/chosen": -0.029979363083839417, "rewards/margins": 2.201843500137329, "rewards/rejected": -2.231822967529297, "step": 8316 }, { "epoch": 0.96, "learning_rate": 1.2536579655858597e-08, "logits/chosen": -3.2021946907043457, "logits/rejected": -3.441253662109375, "logps/chosen": -319.3117980957031, "logps/rejected": -217.3712921142578, "loss": 0.317, "rewards/accuracies": 0.75, "rewards/chosen": -0.35259026288986206, "rewards/margins": 1.7494137287139893, "rewards/rejected": -2.102004051208496, "step": 8317 }, { "epoch": 0.96, "learning_rate": 1.2501463186234344e-08, "logits/chosen": -3.041901111602783, "logits/rejected": -3.025028944015503, "logps/chosen": -350.5223693847656, "logps/rejected": -336.04461669921875, "loss": 0.3742, "rewards/accuracies": 0.75, "rewards/chosen": -0.4548894166946411, "rewards/margins": 1.636356234550476, "rewards/rejected": -2.091245651245117, "step": 8318 }, { "epoch": 0.96, "learning_rate": 1.246634671661009e-08, "logits/chosen": -2.7568929195404053, "logits/rejected": -2.3521616458892822, "logps/chosen": -406.0611267089844, "logps/rejected": -367.01959228515625, "loss": 0.273, "rewards/accuracies": 1.0, "rewards/chosen": -0.9415393471717834, "rewards/margins": 1.5403416156768799, "rewards/rejected": -2.4818809032440186, "step": 8319 }, { "epoch": 0.96, "learning_rate": 1.2431230246985837e-08, "logits/chosen": -3.176500082015991, "logits/rejected": -2.8431355953216553, "logps/chosen": -277.8806457519531, "logps/rejected": -300.62255859375, "loss": 0.2113, "rewards/accuracies": 1.0, "rewards/chosen": 0.9580151438713074, "rewards/margins": 1.8921281099319458, "rewards/rejected": -0.9341130256652832, "step": 8320 }, { "epoch": 0.96, "learning_rate": 1.2396113777361583e-08, "logits/chosen": -2.654128074645996, "logits/rejected": -2.975630044937134, "logps/chosen": -203.5262451171875, "logps/rejected": -349.23101806640625, "loss": 0.3084, "rewards/accuracies": 0.875, "rewards/chosen": -0.13941937685012817, "rewards/margins": 1.5806442499160767, "rewards/rejected": -1.7200636863708496, "step": 8321 }, { "epoch": 0.96, "learning_rate": 1.2360997307737328e-08, "logits/chosen": -3.2189950942993164, "logits/rejected": -3.1958065032958984, "logps/chosen": -440.855224609375, "logps/rejected": -236.7337646484375, "loss": 0.3253, "rewards/accuracies": 0.875, "rewards/chosen": -0.12371267378330231, "rewards/margins": 1.5814650058746338, "rewards/rejected": -1.705177664756775, "step": 8322 }, { "epoch": 0.96, "learning_rate": 1.2325880838113075e-08, "logits/chosen": -3.2147018909454346, "logits/rejected": -3.2391600608825684, "logps/chosen": -179.22091674804688, "logps/rejected": -287.8453674316406, "loss": 0.2351, "rewards/accuracies": 0.875, "rewards/chosen": -0.134456068277359, "rewards/margins": 2.670729637145996, "rewards/rejected": -2.805185556411743, "step": 8323 }, { "epoch": 0.96, "learning_rate": 1.2290764368488821e-08, "logits/chosen": -3.6570916175842285, "logits/rejected": -3.3298940658569336, "logps/chosen": -447.36474609375, "logps/rejected": -247.44729614257812, "loss": 0.824, "rewards/accuracies": 0.625, "rewards/chosen": -1.151455283164978, "rewards/margins": 0.45083773136138916, "rewards/rejected": -1.6022930145263672, "step": 8324 }, { "epoch": 0.96, "learning_rate": 1.2255647898864568e-08, "logits/chosen": -2.811570644378662, "logits/rejected": -2.5945653915405273, "logps/chosen": -232.38682556152344, "logps/rejected": -239.2474365234375, "loss": 0.3294, "rewards/accuracies": 0.875, "rewards/chosen": -0.06838984787464142, "rewards/margins": 1.4631593227386475, "rewards/rejected": -1.531549096107483, "step": 8325 }, { "epoch": 0.96, "learning_rate": 1.2220531429240314e-08, "logits/chosen": -2.777160167694092, "logits/rejected": -3.104778289794922, "logps/chosen": -231.21397399902344, "logps/rejected": -179.45834350585938, "loss": 0.4056, "rewards/accuracies": 0.75, "rewards/chosen": -0.22073610126972198, "rewards/margins": 1.982387900352478, "rewards/rejected": -2.2031240463256836, "step": 8326 }, { "epoch": 0.96, "learning_rate": 1.2185414959616061e-08, "logits/chosen": -3.577101230621338, "logits/rejected": -3.4305624961853027, "logps/chosen": -349.3119201660156, "logps/rejected": -312.13336181640625, "loss": 0.2981, "rewards/accuracies": 0.875, "rewards/chosen": 0.19576379656791687, "rewards/margins": 2.8408281803131104, "rewards/rejected": -2.64506459236145, "step": 8327 }, { "epoch": 0.96, "learning_rate": 1.2150298489991805e-08, "logits/chosen": -2.835967540740967, "logits/rejected": -2.773076057434082, "logps/chosen": -263.3528747558594, "logps/rejected": -239.9281005859375, "loss": 0.4249, "rewards/accuracies": 0.75, "rewards/chosen": -0.3023580014705658, "rewards/margins": 1.5369768142700195, "rewards/rejected": -1.8393347263336182, "step": 8328 }, { "epoch": 0.96, "learning_rate": 1.211518202036755e-08, "logits/chosen": -2.464341640472412, "logits/rejected": -2.4652998447418213, "logps/chosen": -247.4571533203125, "logps/rejected": -213.1072998046875, "loss": 0.3355, "rewards/accuracies": 0.875, "rewards/chosen": 0.08892417699098587, "rewards/margins": 1.4855237007141113, "rewards/rejected": -1.3965994119644165, "step": 8329 }, { "epoch": 0.96, "learning_rate": 1.2080065550743298e-08, "logits/chosen": -2.708432912826538, "logits/rejected": -2.8787789344787598, "logps/chosen": -282.4986877441406, "logps/rejected": -307.5788879394531, "loss": 0.2318, "rewards/accuracies": 0.875, "rewards/chosen": 0.16228751838207245, "rewards/margins": 2.3172192573547363, "rewards/rejected": -2.1549320220947266, "step": 8330 }, { "epoch": 0.96, "learning_rate": 1.2044949081119043e-08, "logits/chosen": -2.7748122215270996, "logits/rejected": -2.8240559101104736, "logps/chosen": -340.56878662109375, "logps/rejected": -259.9059753417969, "loss": 0.2616, "rewards/accuracies": 0.875, "rewards/chosen": -0.26199257373809814, "rewards/margins": 2.4334537982940674, "rewards/rejected": -2.695446252822876, "step": 8331 }, { "epoch": 0.96, "learning_rate": 1.200983261149479e-08, "logits/chosen": -3.7430453300476074, "logits/rejected": -3.0116302967071533, "logps/chosen": -278.8753967285156, "logps/rejected": -256.9015197753906, "loss": 0.081, "rewards/accuracies": 1.0, "rewards/chosen": 0.4746891260147095, "rewards/margins": 3.7006633281707764, "rewards/rejected": -3.2259743213653564, "step": 8332 }, { "epoch": 0.96, "learning_rate": 1.1974716141870536e-08, "logits/chosen": -2.8136937618255615, "logits/rejected": -2.932187557220459, "logps/chosen": -193.53054809570312, "logps/rejected": -277.6652526855469, "loss": 0.8477, "rewards/accuracies": 0.625, "rewards/chosen": -0.5571325421333313, "rewards/margins": 0.7924224734306335, "rewards/rejected": -1.3495550155639648, "step": 8333 }, { "epoch": 0.96, "learning_rate": 1.1939599672246282e-08, "logits/chosen": -3.102844715118408, "logits/rejected": -2.9656200408935547, "logps/chosen": -176.94561767578125, "logps/rejected": -175.3705596923828, "loss": 0.5596, "rewards/accuracies": 0.75, "rewards/chosen": -0.001324981451034546, "rewards/margins": 1.0902819633483887, "rewards/rejected": -1.091606855392456, "step": 8334 }, { "epoch": 0.96, "learning_rate": 1.1904483202622029e-08, "logits/chosen": -3.206015110015869, "logits/rejected": -3.195622444152832, "logps/chosen": -317.5715637207031, "logps/rejected": -315.10809326171875, "loss": 0.4295, "rewards/accuracies": 0.875, "rewards/chosen": 0.36299842596054077, "rewards/margins": 3.121387481689453, "rewards/rejected": -2.7583889961242676, "step": 8335 }, { "epoch": 0.96, "learning_rate": 1.1869366732997775e-08, "logits/chosen": -2.3920087814331055, "logits/rejected": -2.4658753871917725, "logps/chosen": -363.11676025390625, "logps/rejected": -336.82537841796875, "loss": 0.4258, "rewards/accuracies": 0.875, "rewards/chosen": -0.35463541746139526, "rewards/margins": 0.9706010818481445, "rewards/rejected": -1.325236439704895, "step": 8336 }, { "epoch": 0.96, "learning_rate": 1.1834250263373522e-08, "logits/chosen": -3.0141780376434326, "logits/rejected": -3.168233633041382, "logps/chosen": -174.62091064453125, "logps/rejected": -146.85549926757812, "loss": 0.3996, "rewards/accuracies": 0.75, "rewards/chosen": -0.13578039407730103, "rewards/margins": 1.2334873676300049, "rewards/rejected": -1.3692677021026611, "step": 8337 }, { "epoch": 0.96, "learning_rate": 1.1799133793749267e-08, "logits/chosen": -3.2313952445983887, "logits/rejected": -2.9742326736450195, "logps/chosen": -258.1947021484375, "logps/rejected": -216.05862426757812, "loss": 0.308, "rewards/accuracies": 0.875, "rewards/chosen": -0.17683231830596924, "rewards/margins": 1.7335424423217773, "rewards/rejected": -1.9103747606277466, "step": 8338 }, { "epoch": 0.96, "learning_rate": 1.1764017324125015e-08, "logits/chosen": -2.680307388305664, "logits/rejected": -2.6267452239990234, "logps/chosen": -322.4468688964844, "logps/rejected": -247.83578491210938, "loss": 0.6144, "rewards/accuracies": 0.75, "rewards/chosen": -0.34582751989364624, "rewards/margins": 1.000154972076416, "rewards/rejected": -1.345982551574707, "step": 8339 }, { "epoch": 0.96, "learning_rate": 1.172890085450076e-08, "logits/chosen": -2.6219286918640137, "logits/rejected": -2.457559585571289, "logps/chosen": -348.919921875, "logps/rejected": -393.7067565917969, "loss": 0.4312, "rewards/accuracies": 0.625, "rewards/chosen": 0.3196490406990051, "rewards/margins": 1.2903155088424683, "rewards/rejected": -0.9706665277481079, "step": 8340 }, { "epoch": 0.96, "learning_rate": 1.1693784384876507e-08, "logits/chosen": -3.399691104888916, "logits/rejected": -3.024003028869629, "logps/chosen": -279.5122985839844, "logps/rejected": -320.81689453125, "loss": 0.7154, "rewards/accuracies": 0.625, "rewards/chosen": -0.9422402381896973, "rewards/margins": 0.8962936997413635, "rewards/rejected": -1.8385341167449951, "step": 8341 }, { "epoch": 0.96, "learning_rate": 1.1658667915252253e-08, "logits/chosen": -3.774261474609375, "logits/rejected": -3.656068801879883, "logps/chosen": -306.2177734375, "logps/rejected": -232.972412109375, "loss": 0.3329, "rewards/accuracies": 0.875, "rewards/chosen": -0.10800585895776749, "rewards/margins": 1.2873046398162842, "rewards/rejected": -1.395310401916504, "step": 8342 }, { "epoch": 0.96, "learning_rate": 1.1623551445627999e-08, "logits/chosen": -3.7829084396362305, "logits/rejected": -3.6244254112243652, "logps/chosen": -315.57373046875, "logps/rejected": -192.29306030273438, "loss": 0.1364, "rewards/accuracies": 1.0, "rewards/chosen": 0.6309095025062561, "rewards/margins": 2.854215145111084, "rewards/rejected": -2.2233057022094727, "step": 8343 }, { "epoch": 0.96, "learning_rate": 1.1588434976003746e-08, "logits/chosen": -3.070521831512451, "logits/rejected": -3.0306851863861084, "logps/chosen": -246.73489379882812, "logps/rejected": -369.80255126953125, "loss": 0.2053, "rewards/accuracies": 0.875, "rewards/chosen": 0.4843379259109497, "rewards/margins": 3.3994319438934326, "rewards/rejected": -2.9150938987731934, "step": 8344 }, { "epoch": 0.96, "learning_rate": 1.1553318506379491e-08, "logits/chosen": -3.410555362701416, "logits/rejected": -3.197970390319824, "logps/chosen": -193.8603973388672, "logps/rejected": -261.99444580078125, "loss": 0.2654, "rewards/accuracies": 0.875, "rewards/chosen": 0.2860303223133087, "rewards/margins": 2.784748077392578, "rewards/rejected": -2.498717784881592, "step": 8345 }, { "epoch": 0.96, "learning_rate": 1.1518202036755239e-08, "logits/chosen": -3.625643253326416, "logits/rejected": -3.4338743686676025, "logps/chosen": -353.9469299316406, "logps/rejected": -293.3583068847656, "loss": 0.4205, "rewards/accuracies": 0.75, "rewards/chosen": -0.3488956093788147, "rewards/margins": 1.209123969078064, "rewards/rejected": -1.5580196380615234, "step": 8346 }, { "epoch": 0.96, "learning_rate": 1.1483085567130984e-08, "logits/chosen": -2.511627674102783, "logits/rejected": -2.481801986694336, "logps/chosen": -365.9727478027344, "logps/rejected": -220.04505920410156, "loss": 0.337, "rewards/accuracies": 1.0, "rewards/chosen": 0.231556698679924, "rewards/margins": 1.361771583557129, "rewards/rejected": -1.1302149295806885, "step": 8347 }, { "epoch": 0.96, "learning_rate": 1.1447969097506731e-08, "logits/chosen": -4.0744781494140625, "logits/rejected": -3.7220377922058105, "logps/chosen": -269.595947265625, "logps/rejected": -299.44085693359375, "loss": 0.1262, "rewards/accuracies": 1.0, "rewards/chosen": 0.20033805072307587, "rewards/margins": 2.318641424179077, "rewards/rejected": -2.1183032989501953, "step": 8348 }, { "epoch": 0.96, "learning_rate": 1.1412852627882477e-08, "logits/chosen": -3.3912301063537598, "logits/rejected": -3.3960955142974854, "logps/chosen": -148.5876922607422, "logps/rejected": -186.01873779296875, "loss": 0.3327, "rewards/accuracies": 0.75, "rewards/chosen": 0.19692040979862213, "rewards/margins": 2.4906721115112305, "rewards/rejected": -2.2937517166137695, "step": 8349 }, { "epoch": 0.96, "learning_rate": 1.1377736158258224e-08, "logits/chosen": -3.225585699081421, "logits/rejected": -2.8741447925567627, "logps/chosen": -248.22134399414062, "logps/rejected": -305.0395812988281, "loss": 0.1644, "rewards/accuracies": 1.0, "rewards/chosen": -0.1085050106048584, "rewards/margins": 2.3262534141540527, "rewards/rejected": -2.434758424758911, "step": 8350 }, { "epoch": 0.96, "learning_rate": 1.134261968863397e-08, "logits/chosen": -3.4938855171203613, "logits/rejected": -3.4918696880340576, "logps/chosen": -244.40086364746094, "logps/rejected": -293.53936767578125, "loss": 0.2739, "rewards/accuracies": 0.875, "rewards/chosen": 0.8410211801528931, "rewards/margins": 2.875941514968872, "rewards/rejected": -2.0349202156066895, "step": 8351 }, { "epoch": 0.96, "learning_rate": 1.1307503219009714e-08, "logits/chosen": -3.9203619956970215, "logits/rejected": -3.691798210144043, "logps/chosen": -342.06585693359375, "logps/rejected": -302.5403747558594, "loss": 0.327, "rewards/accuracies": 0.75, "rewards/chosen": 0.32267117500305176, "rewards/margins": 1.7062797546386719, "rewards/rejected": -1.3836085796356201, "step": 8352 }, { "epoch": 0.96, "learning_rate": 1.1272386749385461e-08, "logits/chosen": -3.4015979766845703, "logits/rejected": -3.3749566078186035, "logps/chosen": -235.3915252685547, "logps/rejected": -237.5419464111328, "loss": 0.7968, "rewards/accuracies": 0.625, "rewards/chosen": 0.13905665278434753, "rewards/margins": 0.7795342803001404, "rewards/rejected": -0.6404776573181152, "step": 8353 }, { "epoch": 0.96, "learning_rate": 1.1237270279761207e-08, "logits/chosen": -3.5383455753326416, "logits/rejected": -3.5388622283935547, "logps/chosen": -347.6019287109375, "logps/rejected": -255.0184783935547, "loss": 0.4122, "rewards/accuracies": 0.875, "rewards/chosen": -0.306110680103302, "rewards/margins": 1.0887365341186523, "rewards/rejected": -1.3948472738265991, "step": 8354 }, { "epoch": 0.96, "learning_rate": 1.1202153810136952e-08, "logits/chosen": -3.7726616859436035, "logits/rejected": -3.780094623565674, "logps/chosen": -336.2822265625, "logps/rejected": -294.748046875, "loss": 0.2887, "rewards/accuracies": 0.875, "rewards/chosen": 0.22648678719997406, "rewards/margins": 2.163719892501831, "rewards/rejected": -1.9372328519821167, "step": 8355 }, { "epoch": 0.96, "learning_rate": 1.11670373405127e-08, "logits/chosen": -3.0075392723083496, "logits/rejected": -2.889493703842163, "logps/chosen": -204.1392822265625, "logps/rejected": -262.16455078125, "loss": 0.264, "rewards/accuracies": 0.875, "rewards/chosen": -0.10321972519159317, "rewards/margins": 1.9211444854736328, "rewards/rejected": -2.0243642330169678, "step": 8356 }, { "epoch": 0.96, "learning_rate": 1.1131920870888445e-08, "logits/chosen": -3.42621111869812, "logits/rejected": -3.34328556060791, "logps/chosen": -259.737548828125, "logps/rejected": -293.0054016113281, "loss": 0.1706, "rewards/accuracies": 0.875, "rewards/chosen": 0.3775869309902191, "rewards/margins": 3.2843382358551025, "rewards/rejected": -2.9067511558532715, "step": 8357 }, { "epoch": 0.96, "learning_rate": 1.1096804401264192e-08, "logits/chosen": -3.051287889480591, "logits/rejected": -2.8661699295043945, "logps/chosen": -206.09030151367188, "logps/rejected": -262.78558349609375, "loss": 0.5994, "rewards/accuracies": 0.75, "rewards/chosen": -0.2342820018529892, "rewards/margins": 0.9157870411872864, "rewards/rejected": -1.150068998336792, "step": 8358 }, { "epoch": 0.96, "learning_rate": 1.1061687931639938e-08, "logits/chosen": -2.573491096496582, "logits/rejected": -2.6437511444091797, "logps/chosen": -397.5391845703125, "logps/rejected": -514.5775756835938, "loss": 0.6022, "rewards/accuracies": 0.875, "rewards/chosen": -0.8454912900924683, "rewards/margins": 1.334378719329834, "rewards/rejected": -2.179870128631592, "step": 8359 }, { "epoch": 0.96, "learning_rate": 1.1026571462015685e-08, "logits/chosen": -3.6013875007629395, "logits/rejected": -3.5567262172698975, "logps/chosen": -368.9732666015625, "logps/rejected": -291.4639892578125, "loss": 0.6285, "rewards/accuracies": 0.625, "rewards/chosen": -0.5808574557304382, "rewards/margins": 1.3060407638549805, "rewards/rejected": -1.8868982791900635, "step": 8360 }, { "epoch": 0.96, "learning_rate": 1.099145499239143e-08, "logits/chosen": -3.1787893772125244, "logits/rejected": -3.1560988426208496, "logps/chosen": -130.888427734375, "logps/rejected": -112.83800506591797, "loss": 0.516, "rewards/accuracies": 0.75, "rewards/chosen": -0.4592573046684265, "rewards/margins": 0.6163191199302673, "rewards/rejected": -1.0755764245986938, "step": 8361 }, { "epoch": 0.96, "learning_rate": 1.0956338522767178e-08, "logits/chosen": -3.6684792041778564, "logits/rejected": -3.139749526977539, "logps/chosen": -264.620361328125, "logps/rejected": -331.8872985839844, "loss": 0.4216, "rewards/accuracies": 0.625, "rewards/chosen": -0.11644697189331055, "rewards/margins": 1.5230236053466797, "rewards/rejected": -1.6394708156585693, "step": 8362 }, { "epoch": 0.96, "learning_rate": 1.0921222053142923e-08, "logits/chosen": -2.487468719482422, "logits/rejected": -2.2279136180877686, "logps/chosen": -245.96463012695312, "logps/rejected": -182.60719299316406, "loss": 0.3208, "rewards/accuracies": 0.875, "rewards/chosen": 0.5240422487258911, "rewards/margins": 1.7499158382415771, "rewards/rejected": -1.2258737087249756, "step": 8363 }, { "epoch": 0.96, "learning_rate": 1.0886105583518669e-08, "logits/chosen": -3.7783703804016113, "logits/rejected": -3.609898567199707, "logps/chosen": -172.27752685546875, "logps/rejected": -235.08804321289062, "loss": 0.3084, "rewards/accuracies": 0.875, "rewards/chosen": -0.15288373827934265, "rewards/margins": 3.59371018409729, "rewards/rejected": -3.746593713760376, "step": 8364 }, { "epoch": 0.96, "learning_rate": 1.0850989113894416e-08, "logits/chosen": -3.3761239051818848, "logits/rejected": -3.313227653503418, "logps/chosen": -193.20167541503906, "logps/rejected": -186.2373046875, "loss": 0.2262, "rewards/accuracies": 1.0, "rewards/chosen": -0.11200673878192902, "rewards/margins": 2.6004879474639893, "rewards/rejected": -2.7124948501586914, "step": 8365 }, { "epoch": 0.96, "learning_rate": 1.0815872644270162e-08, "logits/chosen": -3.0463600158691406, "logits/rejected": -3.4370076656341553, "logps/chosen": -337.37091064453125, "logps/rejected": -263.44781494140625, "loss": 0.2622, "rewards/accuracies": 0.875, "rewards/chosen": 0.2181866466999054, "rewards/margins": 2.9932994842529297, "rewards/rejected": -2.775113105773926, "step": 8366 }, { "epoch": 0.96, "learning_rate": 1.0780756174645909e-08, "logits/chosen": -3.1007347106933594, "logits/rejected": -3.3043954372406006, "logps/chosen": -233.0471954345703, "logps/rejected": -326.2756042480469, "loss": 0.4307, "rewards/accuracies": 0.75, "rewards/chosen": -0.47641295194625854, "rewards/margins": 2.3825886249542236, "rewards/rejected": -2.859001398086548, "step": 8367 }, { "epoch": 0.96, "learning_rate": 1.0745639705021655e-08, "logits/chosen": -3.105009078979492, "logits/rejected": -3.1570119857788086, "logps/chosen": -180.45680236816406, "logps/rejected": -175.01531982421875, "loss": 0.3782, "rewards/accuracies": 0.75, "rewards/chosen": -0.33033281564712524, "rewards/margins": 1.8355581760406494, "rewards/rejected": -2.16589093208313, "step": 8368 }, { "epoch": 0.96, "learning_rate": 1.0710523235397402e-08, "logits/chosen": -3.549992561340332, "logits/rejected": -3.5817532539367676, "logps/chosen": -392.5830383300781, "logps/rejected": -302.9142150878906, "loss": 0.4062, "rewards/accuracies": 0.75, "rewards/chosen": -0.6391738653182983, "rewards/margins": 1.4591518640518188, "rewards/rejected": -2.098325729370117, "step": 8369 }, { "epoch": 0.96, "learning_rate": 1.0675406765773148e-08, "logits/chosen": -3.0783042907714844, "logits/rejected": -2.7937545776367188, "logps/chosen": -311.4524230957031, "logps/rejected": -205.97613525390625, "loss": 0.351, "rewards/accuracies": 0.875, "rewards/chosen": 0.34792596101760864, "rewards/margins": 1.4021936655044556, "rewards/rejected": -1.0542676448822021, "step": 8370 }, { "epoch": 0.97, "learning_rate": 1.0640290296148895e-08, "logits/chosen": -2.605991840362549, "logits/rejected": -2.6813442707061768, "logps/chosen": -238.73785400390625, "logps/rejected": -432.681884765625, "loss": 0.3365, "rewards/accuracies": 0.875, "rewards/chosen": -0.24159081280231476, "rewards/margins": 1.2608883380889893, "rewards/rejected": -1.5024791955947876, "step": 8371 }, { "epoch": 0.97, "learning_rate": 1.060517382652464e-08, "logits/chosen": -2.108330249786377, "logits/rejected": -2.2844552993774414, "logps/chosen": -233.19692993164062, "logps/rejected": -173.91851806640625, "loss": 0.5369, "rewards/accuracies": 0.75, "rewards/chosen": -0.5502626299858093, "rewards/margins": 1.0513514280319214, "rewards/rejected": -1.6016141176223755, "step": 8372 }, { "epoch": 0.97, "learning_rate": 1.0570057356900386e-08, "logits/chosen": -3.068117618560791, "logits/rejected": -3.18377947807312, "logps/chosen": -229.62010192871094, "logps/rejected": -221.99391174316406, "loss": 0.4541, "rewards/accuracies": 0.875, "rewards/chosen": 0.0775289461016655, "rewards/margins": 1.1989206075668335, "rewards/rejected": -1.1213915348052979, "step": 8373 }, { "epoch": 0.97, "learning_rate": 1.0534940887276133e-08, "logits/chosen": -3.1026909351348877, "logits/rejected": -2.8794331550598145, "logps/chosen": -247.34310913085938, "logps/rejected": -196.1335906982422, "loss": 0.2045, "rewards/accuracies": 1.0, "rewards/chosen": -0.20142866671085358, "rewards/margins": 1.7865043878555298, "rewards/rejected": -1.9879331588745117, "step": 8374 }, { "epoch": 0.97, "learning_rate": 1.0499824417651879e-08, "logits/chosen": -3.6565980911254883, "logits/rejected": -3.838531970977783, "logps/chosen": -185.8131103515625, "logps/rejected": -214.59463500976562, "loss": 0.2687, "rewards/accuracies": 0.875, "rewards/chosen": -0.14594735205173492, "rewards/margins": 2.695552349090576, "rewards/rejected": -2.8414995670318604, "step": 8375 }, { "epoch": 0.97, "learning_rate": 1.0464707948027623e-08, "logits/chosen": -2.5613412857055664, "logits/rejected": -2.4179847240448, "logps/chosen": -243.0853271484375, "logps/rejected": -305.832275390625, "loss": 0.4106, "rewards/accuracies": 0.75, "rewards/chosen": 0.0027925558388233185, "rewards/margins": 1.4293853044509888, "rewards/rejected": -1.4265927076339722, "step": 8376 }, { "epoch": 0.97, "learning_rate": 1.042959147840337e-08, "logits/chosen": -3.6622071266174316, "logits/rejected": -3.0762760639190674, "logps/chosen": -273.20440673828125, "logps/rejected": -298.4071960449219, "loss": 0.2073, "rewards/accuracies": 1.0, "rewards/chosen": 0.29549628496170044, "rewards/margins": 1.886146068572998, "rewards/rejected": -1.5906498432159424, "step": 8377 }, { "epoch": 0.97, "learning_rate": 1.0394475008779116e-08, "logits/chosen": -3.62972354888916, "logits/rejected": -3.817674160003662, "logps/chosen": -198.19970703125, "logps/rejected": -224.87962341308594, "loss": 0.4837, "rewards/accuracies": 0.75, "rewards/chosen": -0.22127294540405273, "rewards/margins": 0.6773828864097595, "rewards/rejected": -0.8986558318138123, "step": 8378 }, { "epoch": 0.97, "learning_rate": 1.0359358539154863e-08, "logits/chosen": -2.6171622276306152, "logits/rejected": -2.8154525756835938, "logps/chosen": -308.27154541015625, "logps/rejected": -436.47216796875, "loss": 0.6863, "rewards/accuracies": 0.625, "rewards/chosen": -0.27204954624176025, "rewards/margins": 1.6139622926712036, "rewards/rejected": -1.8860119581222534, "step": 8379 }, { "epoch": 0.97, "learning_rate": 1.0324242069530608e-08, "logits/chosen": -2.565946340560913, "logits/rejected": -2.5145270824432373, "logps/chosen": -326.9057922363281, "logps/rejected": -359.2528381347656, "loss": 0.228, "rewards/accuracies": 1.0, "rewards/chosen": 0.6061733961105347, "rewards/margins": 1.484626293182373, "rewards/rejected": -0.8784528970718384, "step": 8380 }, { "epoch": 0.97, "learning_rate": 1.0289125599906356e-08, "logits/chosen": -3.339923620223999, "logits/rejected": -3.753171443939209, "logps/chosen": -254.01025390625, "logps/rejected": -231.7086639404297, "loss": 0.1992, "rewards/accuracies": 1.0, "rewards/chosen": 0.16549429297447205, "rewards/margins": 1.8804755210876465, "rewards/rejected": -1.714981198310852, "step": 8381 }, { "epoch": 0.97, "learning_rate": 1.0254009130282101e-08, "logits/chosen": -2.5662381649017334, "logits/rejected": -2.845761299133301, "logps/chosen": -504.08160400390625, "logps/rejected": -428.8846435546875, "loss": 0.1814, "rewards/accuracies": 0.875, "rewards/chosen": 0.3760469853878021, "rewards/margins": 4.12640380859375, "rewards/rejected": -3.750357151031494, "step": 8382 }, { "epoch": 0.97, "learning_rate": 1.0218892660657848e-08, "logits/chosen": -3.310889482498169, "logits/rejected": -3.2768239974975586, "logps/chosen": -116.40216827392578, "logps/rejected": -250.23040771484375, "loss": 0.21, "rewards/accuracies": 1.0, "rewards/chosen": -0.1205882728099823, "rewards/margins": 3.0165646076202393, "rewards/rejected": -3.137152910232544, "step": 8383 }, { "epoch": 0.97, "learning_rate": 1.0183776191033594e-08, "logits/chosen": -3.018362283706665, "logits/rejected": -3.2070484161376953, "logps/chosen": -318.85302734375, "logps/rejected": -212.66879272460938, "loss": 0.5901, "rewards/accuracies": 0.75, "rewards/chosen": 0.039605528116226196, "rewards/margins": 0.7976378202438354, "rewards/rejected": -0.7580322623252869, "step": 8384 }, { "epoch": 0.97, "learning_rate": 1.014865972140934e-08, "logits/chosen": -3.3908605575561523, "logits/rejected": -3.7795255184173584, "logps/chosen": -236.3668212890625, "logps/rejected": -167.8939208984375, "loss": 0.3111, "rewards/accuracies": 0.875, "rewards/chosen": -0.45229461789131165, "rewards/margins": 1.2911782264709473, "rewards/rejected": -1.7434728145599365, "step": 8385 }, { "epoch": 0.97, "learning_rate": 1.0113543251785087e-08, "logits/chosen": -3.527090072631836, "logits/rejected": -3.62808895111084, "logps/chosen": -273.10345458984375, "logps/rejected": -232.60101318359375, "loss": 0.1276, "rewards/accuracies": 0.875, "rewards/chosen": -0.10846251249313354, "rewards/margins": 2.9995741844177246, "rewards/rejected": -3.108036756515503, "step": 8386 }, { "epoch": 0.97, "learning_rate": 1.0078426782160832e-08, "logits/chosen": -2.821237802505493, "logits/rejected": -2.914905309677124, "logps/chosen": -161.9700164794922, "logps/rejected": -293.0438232421875, "loss": 0.3218, "rewards/accuracies": 0.75, "rewards/chosen": -0.5734564065933228, "rewards/margins": 2.190204381942749, "rewards/rejected": -2.7636609077453613, "step": 8387 }, { "epoch": 0.97, "learning_rate": 1.004331031253658e-08, "logits/chosen": -2.2285447120666504, "logits/rejected": -2.7544503211975098, "logps/chosen": -330.267578125, "logps/rejected": -201.34249877929688, "loss": 0.2548, "rewards/accuracies": 1.0, "rewards/chosen": -0.16118726134300232, "rewards/margins": 1.6354562044143677, "rewards/rejected": -1.7966433763504028, "step": 8388 }, { "epoch": 0.97, "learning_rate": 1.0008193842912325e-08, "logits/chosen": -3.134453296661377, "logits/rejected": -3.1870687007904053, "logps/chosen": -314.43218994140625, "logps/rejected": -210.53684997558594, "loss": 0.3773, "rewards/accuracies": 0.75, "rewards/chosen": -0.5068659782409668, "rewards/margins": 1.5577776432037354, "rewards/rejected": -2.064643621444702, "step": 8389 }, { "epoch": 0.97, "learning_rate": 9.973077373288072e-09, "logits/chosen": -2.7010555267333984, "logits/rejected": -2.987222671508789, "logps/chosen": -194.25979614257812, "logps/rejected": -286.1780090332031, "loss": 0.3275, "rewards/accuracies": 0.625, "rewards/chosen": 0.11472973227500916, "rewards/margins": 3.7130191326141357, "rewards/rejected": -3.5982892513275146, "step": 8390 }, { "epoch": 0.97, "learning_rate": 9.937960903663818e-09, "logits/chosen": -2.8756279945373535, "logits/rejected": -3.021961212158203, "logps/chosen": -377.89208984375, "logps/rejected": -268.6658020019531, "loss": 1.0754, "rewards/accuracies": 0.625, "rewards/chosen": -0.9096782803535461, "rewards/margins": 0.09256502985954285, "rewards/rejected": -1.0022432804107666, "step": 8391 }, { "epoch": 0.97, "learning_rate": 9.902844434039565e-09, "logits/chosen": -2.948106288909912, "logits/rejected": -2.754967212677002, "logps/chosen": -218.20574951171875, "logps/rejected": -273.3581848144531, "loss": 0.4206, "rewards/accuracies": 0.75, "rewards/chosen": -0.5688002705574036, "rewards/margins": 1.4077889919281006, "rewards/rejected": -1.9765892028808594, "step": 8392 }, { "epoch": 0.97, "learning_rate": 9.867727964415311e-09, "logits/chosen": -3.4410223960876465, "logits/rejected": -3.3055577278137207, "logps/chosen": -180.4617462158203, "logps/rejected": -159.66038513183594, "loss": 0.2069, "rewards/accuracies": 0.875, "rewards/chosen": 0.4931867718696594, "rewards/margins": 2.150625467300415, "rewards/rejected": -1.6574387550354004, "step": 8393 }, { "epoch": 0.97, "learning_rate": 9.832611494791056e-09, "logits/chosen": -3.9050979614257812, "logits/rejected": -3.380505084991455, "logps/chosen": -276.4307556152344, "logps/rejected": -315.4677734375, "loss": 0.4282, "rewards/accuracies": 0.875, "rewards/chosen": 0.09048400819301605, "rewards/margins": 1.5715667009353638, "rewards/rejected": -1.4810826778411865, "step": 8394 }, { "epoch": 0.97, "learning_rate": 9.797495025166804e-09, "logits/chosen": -2.486135721206665, "logits/rejected": -2.662494659423828, "logps/chosen": -311.4579772949219, "logps/rejected": -297.58282470703125, "loss": 0.269, "rewards/accuracies": 1.0, "rewards/chosen": -0.15604937076568604, "rewards/margins": 2.055245876312256, "rewards/rejected": -2.2112953662872314, "step": 8395 }, { "epoch": 0.97, "learning_rate": 9.76237855554255e-09, "logits/chosen": -3.2524023056030273, "logits/rejected": -3.174048900604248, "logps/chosen": -208.98583984375, "logps/rejected": -204.2353515625, "loss": 0.3854, "rewards/accuracies": 0.875, "rewards/chosen": -0.14812420308589935, "rewards/margins": 1.4177249670028687, "rewards/rejected": -1.5658491849899292, "step": 8396 }, { "epoch": 0.97, "learning_rate": 9.727262085918296e-09, "logits/chosen": -3.3746867179870605, "logits/rejected": -3.54532790184021, "logps/chosen": -327.8814392089844, "logps/rejected": -311.01055908203125, "loss": 0.3278, "rewards/accuracies": 0.875, "rewards/chosen": 0.3664657175540924, "rewards/margins": 2.320497512817383, "rewards/rejected": -1.9540317058563232, "step": 8397 }, { "epoch": 0.97, "learning_rate": 9.692145616294042e-09, "logits/chosen": -3.7393250465393066, "logits/rejected": -3.021017074584961, "logps/chosen": -347.60504150390625, "logps/rejected": -177.91415405273438, "loss": 0.2521, "rewards/accuracies": 0.875, "rewards/chosen": 5.211681127548218e-05, "rewards/margins": 2.0167319774627686, "rewards/rejected": -2.0166797637939453, "step": 8398 }, { "epoch": 0.97, "learning_rate": 9.65702914666979e-09, "logits/chosen": -2.635904550552368, "logits/rejected": -2.4802560806274414, "logps/chosen": -292.46533203125, "logps/rejected": -302.8505554199219, "loss": 1.06, "rewards/accuracies": 0.25, "rewards/chosen": -1.0918371677398682, "rewards/margins": -0.45846885442733765, "rewards/rejected": -0.6333682537078857, "step": 8399 }, { "epoch": 0.97, "learning_rate": 9.621912677045533e-09, "logits/chosen": -2.6441781520843506, "logits/rejected": -2.659958839416504, "logps/chosen": -166.8682098388672, "logps/rejected": -215.74281311035156, "loss": 0.3947, "rewards/accuracies": 0.75, "rewards/chosen": 0.2554742693901062, "rewards/margins": 2.5519919395446777, "rewards/rejected": -2.2965176105499268, "step": 8400 }, { "epoch": 0.97, "learning_rate": 9.586796207421279e-09, "logits/chosen": -2.9591426849365234, "logits/rejected": -2.8547120094299316, "logps/chosen": -318.00146484375, "logps/rejected": -169.09852600097656, "loss": 0.5803, "rewards/accuracies": 0.5, "rewards/chosen": -0.013365454971790314, "rewards/margins": 1.258200764656067, "rewards/rejected": -1.271566390991211, "step": 8401 }, { "epoch": 0.97, "learning_rate": 9.551679737797026e-09, "logits/chosen": -2.523224115371704, "logits/rejected": -2.5810837745666504, "logps/chosen": -289.3690185546875, "logps/rejected": -243.92283630371094, "loss": 0.4409, "rewards/accuracies": 0.75, "rewards/chosen": -0.5940326452255249, "rewards/margins": 1.1500616073608398, "rewards/rejected": -1.7440943717956543, "step": 8402 }, { "epoch": 0.97, "learning_rate": 9.516563268172772e-09, "logits/chosen": -3.3571534156799316, "logits/rejected": -3.4374966621398926, "logps/chosen": -407.5580749511719, "logps/rejected": -347.18701171875, "loss": 0.1713, "rewards/accuracies": 1.0, "rewards/chosen": 0.3963181674480438, "rewards/margins": 3.069068431854248, "rewards/rejected": -2.672750234603882, "step": 8403 }, { "epoch": 0.97, "learning_rate": 9.481446798548519e-09, "logits/chosen": -3.0084073543548584, "logits/rejected": -3.371002197265625, "logps/chosen": -140.05503845214844, "logps/rejected": -211.66207885742188, "loss": 0.2052, "rewards/accuracies": 1.0, "rewards/chosen": 0.4996350407600403, "rewards/margins": 2.620623826980591, "rewards/rejected": -2.1209888458251953, "step": 8404 }, { "epoch": 0.97, "learning_rate": 9.446330328924264e-09, "logits/chosen": -3.064945697784424, "logits/rejected": -3.360159158706665, "logps/chosen": -154.15371704101562, "logps/rejected": -260.14703369140625, "loss": 0.339, "rewards/accuracies": 0.75, "rewards/chosen": 0.032532498240470886, "rewards/margins": 2.2552030086517334, "rewards/rejected": -2.222670555114746, "step": 8405 }, { "epoch": 0.97, "learning_rate": 9.41121385930001e-09, "logits/chosen": -3.442075252532959, "logits/rejected": -3.4038009643554688, "logps/chosen": -281.39080810546875, "logps/rejected": -168.22610473632812, "loss": 0.3338, "rewards/accuracies": 0.875, "rewards/chosen": 0.4891003668308258, "rewards/margins": 1.7336540222167969, "rewards/rejected": -1.244553804397583, "step": 8406 }, { "epoch": 0.97, "learning_rate": 9.376097389675757e-09, "logits/chosen": -2.974752902984619, "logits/rejected": -2.8256354331970215, "logps/chosen": -295.47662353515625, "logps/rejected": -350.033935546875, "loss": 0.4056, "rewards/accuracies": 0.875, "rewards/chosen": -0.4749956727027893, "rewards/margins": 1.2463704347610474, "rewards/rejected": -1.721366047859192, "step": 8407 }, { "epoch": 0.97, "learning_rate": 9.340980920051503e-09, "logits/chosen": -3.551370143890381, "logits/rejected": -3.638317823410034, "logps/chosen": -275.5017395019531, "logps/rejected": -251.56695556640625, "loss": 0.3746, "rewards/accuracies": 0.875, "rewards/chosen": 0.04572046920657158, "rewards/margins": 1.3908506631851196, "rewards/rejected": -1.3451303243637085, "step": 8408 }, { "epoch": 0.97, "learning_rate": 9.30586445042725e-09, "logits/chosen": -3.1304540634155273, "logits/rejected": -3.0111711025238037, "logps/chosen": -158.97024536132812, "logps/rejected": -252.0541534423828, "loss": 0.6097, "rewards/accuracies": 0.75, "rewards/chosen": -0.9647552967071533, "rewards/margins": 0.9240675568580627, "rewards/rejected": -1.8888227939605713, "step": 8409 }, { "epoch": 0.97, "learning_rate": 9.270747980802996e-09, "logits/chosen": -3.0113649368286133, "logits/rejected": -3.0797882080078125, "logps/chosen": -269.68304443359375, "logps/rejected": -281.2015380859375, "loss": 0.2866, "rewards/accuracies": 0.875, "rewards/chosen": -0.06046495586633682, "rewards/margins": 1.7535547018051147, "rewards/rejected": -1.8140195608139038, "step": 8410 }, { "epoch": 0.97, "learning_rate": 9.235631511178743e-09, "logits/chosen": -3.62265682220459, "logits/rejected": -3.701540946960449, "logps/chosen": -341.08233642578125, "logps/rejected": -313.3612976074219, "loss": 0.1735, "rewards/accuracies": 1.0, "rewards/chosen": 0.3652455508708954, "rewards/margins": 2.9969892501831055, "rewards/rejected": -2.631743907928467, "step": 8411 }, { "epoch": 0.97, "learning_rate": 9.200515041554488e-09, "logits/chosen": -1.7785189151763916, "logits/rejected": -2.0118889808654785, "logps/chosen": -262.4716796875, "logps/rejected": -277.7813720703125, "loss": 0.4971, "rewards/accuracies": 0.75, "rewards/chosen": -0.2938191294670105, "rewards/margins": 0.9168039560317993, "rewards/rejected": -1.2106231451034546, "step": 8412 }, { "epoch": 0.97, "learning_rate": 9.165398571930236e-09, "logits/chosen": -3.9835095405578613, "logits/rejected": -4.1817097663879395, "logps/chosen": -284.0061950683594, "logps/rejected": -239.89694213867188, "loss": 0.2847, "rewards/accuracies": 0.875, "rewards/chosen": -0.5119163990020752, "rewards/margins": 1.835291862487793, "rewards/rejected": -2.3472084999084473, "step": 8413 }, { "epoch": 0.97, "learning_rate": 9.130282102305981e-09, "logits/chosen": -2.325190782546997, "logits/rejected": -1.9487972259521484, "logps/chosen": -489.46270751953125, "logps/rejected": -327.79412841796875, "loss": 0.4688, "rewards/accuracies": 0.625, "rewards/chosen": -0.30379363894462585, "rewards/margins": 2.114424705505371, "rewards/rejected": -2.4182186126708984, "step": 8414 }, { "epoch": 0.97, "learning_rate": 9.095165632681727e-09, "logits/chosen": -3.176107406616211, "logits/rejected": -3.487386465072632, "logps/chosen": -135.98165893554688, "logps/rejected": -230.57981872558594, "loss": 0.457, "rewards/accuracies": 0.875, "rewards/chosen": -0.7797970771789551, "rewards/margins": 0.8574131727218628, "rewards/rejected": -1.6372102499008179, "step": 8415 }, { "epoch": 0.97, "learning_rate": 9.060049163057474e-09, "logits/chosen": -3.7591748237609863, "logits/rejected": -3.552121639251709, "logps/chosen": -255.68797302246094, "logps/rejected": -258.18408203125, "loss": 0.3724, "rewards/accuracies": 0.875, "rewards/chosen": -0.25968602299690247, "rewards/margins": 0.9982882738113403, "rewards/rejected": -1.25797438621521, "step": 8416 }, { "epoch": 0.97, "learning_rate": 9.02493269343322e-09, "logits/chosen": -2.9031543731689453, "logits/rejected": -2.9792163372039795, "logps/chosen": -148.63812255859375, "logps/rejected": -240.86782836914062, "loss": 0.4043, "rewards/accuracies": 0.875, "rewards/chosen": 0.38928067684173584, "rewards/margins": 1.7886607646942139, "rewards/rejected": -1.399380087852478, "step": 8417 }, { "epoch": 0.97, "learning_rate": 8.989816223808965e-09, "logits/chosen": -2.9580183029174805, "logits/rejected": -3.0599515438079834, "logps/chosen": -340.7004089355469, "logps/rejected": -224.855224609375, "loss": 0.4378, "rewards/accuracies": 0.875, "rewards/chosen": -0.5318108797073364, "rewards/margins": 1.2630248069763184, "rewards/rejected": -1.7948358058929443, "step": 8418 }, { "epoch": 0.97, "learning_rate": 8.954699754184713e-09, "logits/chosen": -2.60532546043396, "logits/rejected": -2.6548120975494385, "logps/chosen": -206.2239227294922, "logps/rejected": -362.374755859375, "loss": 0.1975, "rewards/accuracies": 1.0, "rewards/chosen": 0.0584198459982872, "rewards/margins": 2.5225110054016113, "rewards/rejected": -2.4640913009643555, "step": 8419 }, { "epoch": 0.97, "learning_rate": 8.919583284560458e-09, "logits/chosen": -3.589961528778076, "logits/rejected": -3.813948392868042, "logps/chosen": -75.11849975585938, "logps/rejected": -268.4548034667969, "loss": 0.2946, "rewards/accuracies": 0.75, "rewards/chosen": 0.05391992628574371, "rewards/margins": 3.491680860519409, "rewards/rejected": -3.437760829925537, "step": 8420 }, { "epoch": 0.97, "learning_rate": 8.884466814936204e-09, "logits/chosen": -2.8860201835632324, "logits/rejected": -3.0575625896453857, "logps/chosen": -190.44007873535156, "logps/rejected": -325.57757568359375, "loss": 0.1628, "rewards/accuracies": 1.0, "rewards/chosen": -0.1732223629951477, "rewards/margins": 2.3098130226135254, "rewards/rejected": -2.4830353260040283, "step": 8421 }, { "epoch": 0.97, "learning_rate": 8.849350345311951e-09, "logits/chosen": -3.5799875259399414, "logits/rejected": -3.436499834060669, "logps/chosen": -357.90582275390625, "logps/rejected": -215.13235473632812, "loss": 0.2429, "rewards/accuracies": 0.875, "rewards/chosen": -0.2094860076904297, "rewards/margins": 2.6277050971984863, "rewards/rejected": -2.837191104888916, "step": 8422 }, { "epoch": 0.97, "learning_rate": 8.814233875687697e-09, "logits/chosen": -3.130901336669922, "logits/rejected": -3.100586414337158, "logps/chosen": -250.59014892578125, "logps/rejected": -249.2540740966797, "loss": 0.5549, "rewards/accuracies": 0.875, "rewards/chosen": 0.2820245623588562, "rewards/margins": 1.9295485019683838, "rewards/rejected": -1.6475238800048828, "step": 8423 }, { "epoch": 0.97, "learning_rate": 8.779117406063444e-09, "logits/chosen": -2.908703327178955, "logits/rejected": -2.729710102081299, "logps/chosen": -170.94032287597656, "logps/rejected": -181.90565490722656, "loss": 0.8448, "rewards/accuracies": 0.625, "rewards/chosen": -0.7636530995368958, "rewards/margins": 0.361050546169281, "rewards/rejected": -1.1247036457061768, "step": 8424 }, { "epoch": 0.97, "learning_rate": 8.74400093643919e-09, "logits/chosen": -3.1324214935302734, "logits/rejected": -3.304983139038086, "logps/chosen": -183.11270141601562, "logps/rejected": -142.7198486328125, "loss": 0.5273, "rewards/accuracies": 0.625, "rewards/chosen": -0.011617705225944519, "rewards/margins": 0.9910141229629517, "rewards/rejected": -1.0026317834854126, "step": 8425 }, { "epoch": 0.97, "learning_rate": 8.708884466814937e-09, "logits/chosen": -3.3177943229675293, "logits/rejected": -3.577139139175415, "logps/chosen": -202.10000610351562, "logps/rejected": -262.4162292480469, "loss": 0.2588, "rewards/accuracies": 0.875, "rewards/chosen": 0.3413122892379761, "rewards/margins": 1.8321030139923096, "rewards/rejected": -1.490790605545044, "step": 8426 }, { "epoch": 0.97, "learning_rate": 8.673767997190682e-09, "logits/chosen": -3.231203556060791, "logits/rejected": -2.677748680114746, "logps/chosen": -658.9725341796875, "logps/rejected": -233.56832885742188, "loss": 0.2511, "rewards/accuracies": 0.875, "rewards/chosen": -0.40233314037323, "rewards/margins": 2.0843820571899414, "rewards/rejected": -2.486715316772461, "step": 8427 }, { "epoch": 0.97, "learning_rate": 8.63865152756643e-09, "logits/chosen": -3.0973427295684814, "logits/rejected": -3.339567184448242, "logps/chosen": -273.0887145996094, "logps/rejected": -286.77691650390625, "loss": 0.4199, "rewards/accuracies": 0.75, "rewards/chosen": -0.3005458414554596, "rewards/margins": 1.4008681774139404, "rewards/rejected": -1.7014141082763672, "step": 8428 }, { "epoch": 0.97, "learning_rate": 8.603535057942175e-09, "logits/chosen": -3.2679266929626465, "logits/rejected": -3.3864011764526367, "logps/chosen": -214.06427001953125, "logps/rejected": -233.89022827148438, "loss": 0.4141, "rewards/accuracies": 0.75, "rewards/chosen": 0.00703786313533783, "rewards/margins": 1.214803695678711, "rewards/rejected": -1.207765817642212, "step": 8429 }, { "epoch": 0.97, "learning_rate": 8.56841858831792e-09, "logits/chosen": -2.726724624633789, "logits/rejected": -3.064340353012085, "logps/chosen": -219.16258239746094, "logps/rejected": -209.36106872558594, "loss": 0.5378, "rewards/accuracies": 0.625, "rewards/chosen": -0.177345871925354, "rewards/margins": 1.3730950355529785, "rewards/rejected": -1.550440788269043, "step": 8430 }, { "epoch": 0.97, "learning_rate": 8.533302118693666e-09, "logits/chosen": -2.5691819190979004, "logits/rejected": -2.700509548187256, "logps/chosen": -228.4883270263672, "logps/rejected": -412.16900634765625, "loss": 0.4476, "rewards/accuracies": 0.625, "rewards/chosen": 0.07041977345943451, "rewards/margins": 2.6801564693450928, "rewards/rejected": -2.609736680984497, "step": 8431 }, { "epoch": 0.97, "learning_rate": 8.498185649069413e-09, "logits/chosen": -2.973529815673828, "logits/rejected": -3.201263904571533, "logps/chosen": -225.58535766601562, "logps/rejected": -221.8105926513672, "loss": 0.3903, "rewards/accuracies": 0.875, "rewards/chosen": -0.6797542572021484, "rewards/margins": 1.602910041809082, "rewards/rejected": -2.2826645374298096, "step": 8432 }, { "epoch": 0.97, "learning_rate": 8.463069179445159e-09, "logits/chosen": -2.921501636505127, "logits/rejected": -2.9915084838867188, "logps/chosen": -264.0403137207031, "logps/rejected": -285.39642333984375, "loss": 0.435, "rewards/accuracies": 0.75, "rewards/chosen": -0.1676577627658844, "rewards/margins": 1.2559481859207153, "rewards/rejected": -1.4236059188842773, "step": 8433 }, { "epoch": 0.97, "learning_rate": 8.427952709820906e-09, "logits/chosen": -2.9410154819488525, "logits/rejected": -3.211538314819336, "logps/chosen": -238.24871826171875, "logps/rejected": -208.40740966796875, "loss": 0.579, "rewards/accuracies": 0.75, "rewards/chosen": 0.22446183860301971, "rewards/margins": 1.5096815824508667, "rewards/rejected": -1.285219669342041, "step": 8434 }, { "epoch": 0.97, "learning_rate": 8.392836240196652e-09, "logits/chosen": -3.12337589263916, "logits/rejected": -3.382150173187256, "logps/chosen": -326.0976257324219, "logps/rejected": -223.6885986328125, "loss": 0.5221, "rewards/accuracies": 0.875, "rewards/chosen": -0.5976174473762512, "rewards/margins": 1.4886724948883057, "rewards/rejected": -2.086289882659912, "step": 8435 }, { "epoch": 0.97, "learning_rate": 8.357719770572397e-09, "logits/chosen": -3.312103271484375, "logits/rejected": -3.4446029663085938, "logps/chosen": -247.78341674804688, "logps/rejected": -229.12933349609375, "loss": 0.2469, "rewards/accuracies": 0.875, "rewards/chosen": -0.35446399450302124, "rewards/margins": 2.385605812072754, "rewards/rejected": -2.740069627761841, "step": 8436 }, { "epoch": 0.97, "learning_rate": 8.322603300948145e-09, "logits/chosen": -3.286302328109741, "logits/rejected": -3.2838504314422607, "logps/chosen": -275.45904541015625, "logps/rejected": -242.29273986816406, "loss": 0.2694, "rewards/accuracies": 0.875, "rewards/chosen": -0.5535131692886353, "rewards/margins": 1.8610289096832275, "rewards/rejected": -2.4145419597625732, "step": 8437 }, { "epoch": 0.97, "learning_rate": 8.28748683132389e-09, "logits/chosen": -3.3749213218688965, "logits/rejected": -3.7651848793029785, "logps/chosen": -190.9357147216797, "logps/rejected": -200.5861053466797, "loss": 0.4979, "rewards/accuracies": 0.75, "rewards/chosen": 0.05998539179563522, "rewards/margins": 1.9054255485534668, "rewards/rejected": -1.845440149307251, "step": 8438 }, { "epoch": 0.97, "learning_rate": 8.252370361699637e-09, "logits/chosen": -2.337521553039551, "logits/rejected": -2.308581829071045, "logps/chosen": -168.5311737060547, "logps/rejected": -261.06707763671875, "loss": 0.1753, "rewards/accuracies": 1.0, "rewards/chosen": 0.24522832036018372, "rewards/margins": 2.434825897216797, "rewards/rejected": -2.1895976066589355, "step": 8439 }, { "epoch": 0.97, "learning_rate": 8.217253892075383e-09, "logits/chosen": -3.796539545059204, "logits/rejected": -3.5094943046569824, "logps/chosen": -326.50506591796875, "logps/rejected": -257.970703125, "loss": 0.2766, "rewards/accuracies": 0.875, "rewards/chosen": -0.215492382645607, "rewards/margins": 3.1530280113220215, "rewards/rejected": -3.3685200214385986, "step": 8440 }, { "epoch": 0.97, "learning_rate": 8.18213742245113e-09, "logits/chosen": -3.0618038177490234, "logits/rejected": -2.9919629096984863, "logps/chosen": -177.88943481445312, "logps/rejected": -279.259765625, "loss": 0.4286, "rewards/accuracies": 0.75, "rewards/chosen": -0.07048708200454712, "rewards/margins": 1.0877712965011597, "rewards/rejected": -1.1582584381103516, "step": 8441 }, { "epoch": 0.97, "learning_rate": 8.147020952826874e-09, "logits/chosen": -3.554231643676758, "logits/rejected": -3.4620437622070312, "logps/chosen": -208.33322143554688, "logps/rejected": -159.91249084472656, "loss": 0.3791, "rewards/accuracies": 0.875, "rewards/chosen": 0.001927502453327179, "rewards/margins": 1.890863299369812, "rewards/rejected": -1.8889358043670654, "step": 8442 }, { "epoch": 0.97, "learning_rate": 8.111904483202621e-09, "logits/chosen": -3.2849061489105225, "logits/rejected": -3.3064053058624268, "logps/chosen": -207.39105224609375, "logps/rejected": -266.3307800292969, "loss": 0.557, "rewards/accuracies": 0.625, "rewards/chosen": -0.773143470287323, "rewards/margins": 0.8364297151565552, "rewards/rejected": -1.6095731258392334, "step": 8443 }, { "epoch": 0.97, "learning_rate": 8.076788013578367e-09, "logits/chosen": -3.5888831615448, "logits/rejected": -3.9313549995422363, "logps/chosen": -455.7917785644531, "logps/rejected": -458.7384338378906, "loss": 0.2775, "rewards/accuracies": 0.875, "rewards/chosen": 0.1357952058315277, "rewards/margins": 3.3609485626220703, "rewards/rejected": -3.225153684616089, "step": 8444 }, { "epoch": 0.97, "learning_rate": 8.041671543954114e-09, "logits/chosen": -3.183074474334717, "logits/rejected": -2.9479448795318604, "logps/chosen": -317.97344970703125, "logps/rejected": -228.6673583984375, "loss": 0.5455, "rewards/accuracies": 0.625, "rewards/chosen": -0.06765228509902954, "rewards/margins": 1.0499480962753296, "rewards/rejected": -1.117600440979004, "step": 8445 }, { "epoch": 0.97, "learning_rate": 8.00655507432986e-09, "logits/chosen": -2.9984371662139893, "logits/rejected": -2.847534418106079, "logps/chosen": -164.04083251953125, "logps/rejected": -257.59918212890625, "loss": 0.4088, "rewards/accuracies": 0.875, "rewards/chosen": -0.5056501030921936, "rewards/margins": 2.45600962638855, "rewards/rejected": -2.9616596698760986, "step": 8446 }, { "epoch": 0.97, "learning_rate": 7.971438604705607e-09, "logits/chosen": -2.8934905529022217, "logits/rejected": -2.8812272548675537, "logps/chosen": -321.766357421875, "logps/rejected": -383.79193115234375, "loss": 0.5697, "rewards/accuracies": 0.75, "rewards/chosen": 0.08661418408155441, "rewards/margins": 1.2753524780273438, "rewards/rejected": -1.1887381076812744, "step": 8447 }, { "epoch": 0.97, "learning_rate": 7.936322135081353e-09, "logits/chosen": -3.2338428497314453, "logits/rejected": -3.505424976348877, "logps/chosen": -286.4169006347656, "logps/rejected": -387.6412658691406, "loss": 0.2951, "rewards/accuracies": 1.0, "rewards/chosen": -0.08707311749458313, "rewards/margins": 1.4228544235229492, "rewards/rejected": -1.5099276304244995, "step": 8448 }, { "epoch": 0.97, "learning_rate": 7.9012056654571e-09, "logits/chosen": -3.4245548248291016, "logits/rejected": -3.18292498588562, "logps/chosen": -176.4376983642578, "logps/rejected": -280.36431884765625, "loss": 0.4635, "rewards/accuracies": 0.875, "rewards/chosen": 0.005509167909622192, "rewards/margins": 1.3783385753631592, "rewards/rejected": -1.3728294372558594, "step": 8449 }, { "epoch": 0.97, "learning_rate": 7.866089195832845e-09, "logits/chosen": -2.9239397048950195, "logits/rejected": -2.795772075653076, "logps/chosen": -475.38507080078125, "logps/rejected": -334.826904296875, "loss": 0.1453, "rewards/accuracies": 1.0, "rewards/chosen": 0.3018825650215149, "rewards/margins": 3.2029008865356445, "rewards/rejected": -2.9010181427001953, "step": 8450 }, { "epoch": 0.97, "learning_rate": 7.830972726208591e-09, "logits/chosen": -2.3931281566619873, "logits/rejected": -2.514948606491089, "logps/chosen": -253.69070434570312, "logps/rejected": -263.8158264160156, "loss": 0.4294, "rewards/accuracies": 0.875, "rewards/chosen": -0.1243424266576767, "rewards/margins": 1.6393765211105347, "rewards/rejected": -1.763719081878662, "step": 8451 }, { "epoch": 0.97, "learning_rate": 7.795856256584338e-09, "logits/chosen": -3.198887348175049, "logits/rejected": -3.1819355487823486, "logps/chosen": -218.28741455078125, "logps/rejected": -269.8429870605469, "loss": 0.2512, "rewards/accuracies": 0.875, "rewards/chosen": 0.3657569885253906, "rewards/margins": 2.493149995803833, "rewards/rejected": -2.1273932456970215, "step": 8452 }, { "epoch": 0.97, "learning_rate": 7.760739786960084e-09, "logits/chosen": -2.5978691577911377, "logits/rejected": -2.7538676261901855, "logps/chosen": -541.8552856445312, "logps/rejected": -390.70538330078125, "loss": 0.2592, "rewards/accuracies": 1.0, "rewards/chosen": 0.8011832237243652, "rewards/margins": 1.7862989902496338, "rewards/rejected": -0.9851157665252686, "step": 8453 }, { "epoch": 0.97, "learning_rate": 7.72562331733583e-09, "logits/chosen": -2.998096227645874, "logits/rejected": -3.3441169261932373, "logps/chosen": -189.32965087890625, "logps/rejected": -155.8365478515625, "loss": 0.7261, "rewards/accuracies": 0.5, "rewards/chosen": -0.3956030309200287, "rewards/margins": 0.17512786388397217, "rewards/rejected": -0.5707309246063232, "step": 8454 }, { "epoch": 0.97, "learning_rate": 7.690506847711577e-09, "logits/chosen": -2.8574888706207275, "logits/rejected": -2.992537021636963, "logps/chosen": -239.76779174804688, "logps/rejected": -210.01885986328125, "loss": 0.491, "rewards/accuracies": 0.75, "rewards/chosen": -0.5048083066940308, "rewards/margins": 1.7492955923080444, "rewards/rejected": -2.254103899002075, "step": 8455 }, { "epoch": 0.97, "learning_rate": 7.655390378087322e-09, "logits/chosen": -2.875180721282959, "logits/rejected": -2.890360116958618, "logps/chosen": -143.51181030273438, "logps/rejected": -299.0469055175781, "loss": 0.4173, "rewards/accuracies": 0.875, "rewards/chosen": 0.2668341100215912, "rewards/margins": 1.5020010471343994, "rewards/rejected": -1.2351669073104858, "step": 8456 }, { "epoch": 0.97, "learning_rate": 7.620273908463068e-09, "logits/chosen": -3.2421882152557373, "logits/rejected": -3.2816162109375, "logps/chosen": -306.53521728515625, "logps/rejected": -358.46221923828125, "loss": 0.6347, "rewards/accuracies": 0.875, "rewards/chosen": -0.8493460416793823, "rewards/margins": 1.554395079612732, "rewards/rejected": -2.4037411212921143, "step": 8457 }, { "epoch": 0.98, "learning_rate": 7.585157438838815e-09, "logits/chosen": -3.064868450164795, "logits/rejected": -3.13398814201355, "logps/chosen": -304.1175537109375, "logps/rejected": -261.4644775390625, "loss": 0.7485, "rewards/accuracies": 0.625, "rewards/chosen": -0.3816271126270294, "rewards/margins": 0.6329077482223511, "rewards/rejected": -1.014534831047058, "step": 8458 }, { "epoch": 0.98, "learning_rate": 7.55004096921456e-09, "logits/chosen": -4.058652400970459, "logits/rejected": -3.949221134185791, "logps/chosen": -86.2927474975586, "logps/rejected": -95.63933563232422, "loss": 0.2926, "rewards/accuracies": 1.0, "rewards/chosen": 0.479600191116333, "rewards/margins": 1.5495434999465942, "rewards/rejected": -1.0699434280395508, "step": 8459 }, { "epoch": 0.98, "learning_rate": 7.514924499590308e-09, "logits/chosen": -3.4089598655700684, "logits/rejected": -3.026139497756958, "logps/chosen": -326.42108154296875, "logps/rejected": -241.8504180908203, "loss": 1.0241, "rewards/accuracies": 0.625, "rewards/chosen": -0.7534061670303345, "rewards/margins": -0.11472529172897339, "rewards/rejected": -0.6386807560920715, "step": 8460 }, { "epoch": 0.98, "learning_rate": 7.479808029966053e-09, "logits/chosen": -3.539700984954834, "logits/rejected": -3.4401373863220215, "logps/chosen": -120.50401306152344, "logps/rejected": -142.69139099121094, "loss": 0.3613, "rewards/accuracies": 0.875, "rewards/chosen": 0.3258820176124573, "rewards/margins": 1.8816559314727783, "rewards/rejected": -1.5557738542556763, "step": 8461 }, { "epoch": 0.98, "learning_rate": 7.4446915603418e-09, "logits/chosen": -3.336719512939453, "logits/rejected": -3.554978370666504, "logps/chosen": -287.3680725097656, "logps/rejected": -237.15216064453125, "loss": 0.5642, "rewards/accuracies": 0.625, "rewards/chosen": -0.6230790019035339, "rewards/margins": 0.6419951319694519, "rewards/rejected": -1.2650740146636963, "step": 8462 }, { "epoch": 0.98, "learning_rate": 7.409575090717546e-09, "logits/chosen": -2.5466864109039307, "logits/rejected": -2.408594846725464, "logps/chosen": -337.4132995605469, "logps/rejected": -388.8101501464844, "loss": 0.302, "rewards/accuracies": 0.75, "rewards/chosen": 0.6088428497314453, "rewards/margins": 2.242417812347412, "rewards/rejected": -1.6335749626159668, "step": 8463 }, { "epoch": 0.98, "learning_rate": 7.374458621093293e-09, "logits/chosen": -3.092594623565674, "logits/rejected": -3.2774574756622314, "logps/chosen": -153.77410888671875, "logps/rejected": -231.7120361328125, "loss": 0.2599, "rewards/accuracies": 0.875, "rewards/chosen": -0.3202836513519287, "rewards/margins": 1.8681021928787231, "rewards/rejected": -2.1883859634399414, "step": 8464 }, { "epoch": 0.98, "learning_rate": 7.339342151469039e-09, "logits/chosen": -3.6672587394714355, "logits/rejected": -3.6846890449523926, "logps/chosen": -164.27532958984375, "logps/rejected": -221.946044921875, "loss": 0.1642, "rewards/accuracies": 1.0, "rewards/chosen": 0.36008062958717346, "rewards/margins": 2.4470369815826416, "rewards/rejected": -2.086956262588501, "step": 8465 }, { "epoch": 0.98, "learning_rate": 7.304225681844785e-09, "logits/chosen": -3.2541215419769287, "logits/rejected": -3.153207778930664, "logps/chosen": -208.61163330078125, "logps/rejected": -209.39495849609375, "loss": 0.2716, "rewards/accuracies": 0.875, "rewards/chosen": -0.24462464451789856, "rewards/margins": 2.1284637451171875, "rewards/rejected": -2.3730883598327637, "step": 8466 }, { "epoch": 0.98, "learning_rate": 7.269109212220531e-09, "logits/chosen": -3.8739960193634033, "logits/rejected": -3.6803321838378906, "logps/chosen": -323.9363098144531, "logps/rejected": -248.87367248535156, "loss": 0.3554, "rewards/accuracies": 0.75, "rewards/chosen": 0.35727477073669434, "rewards/margins": 2.4737980365753174, "rewards/rejected": -2.116523265838623, "step": 8467 }, { "epoch": 0.98, "learning_rate": 7.233992742596277e-09, "logits/chosen": -2.7860612869262695, "logits/rejected": -2.8490891456604004, "logps/chosen": -497.5242614746094, "logps/rejected": -217.41055297851562, "loss": 0.3788, "rewards/accuracies": 0.875, "rewards/chosen": 0.14588817954063416, "rewards/margins": 1.030962586402893, "rewards/rejected": -0.8850744962692261, "step": 8468 }, { "epoch": 0.98, "learning_rate": 7.198876272972023e-09, "logits/chosen": -3.260657787322998, "logits/rejected": -3.2256827354431152, "logps/chosen": -221.76560974121094, "logps/rejected": -147.77293395996094, "loss": 0.3322, "rewards/accuracies": 0.875, "rewards/chosen": 0.08930650353431702, "rewards/margins": 1.5668606758117676, "rewards/rejected": -1.477554202079773, "step": 8469 }, { "epoch": 0.98, "learning_rate": 7.1637598033477695e-09, "logits/chosen": -2.954310894012451, "logits/rejected": -2.848630428314209, "logps/chosen": -124.78446960449219, "logps/rejected": -193.85659790039062, "loss": 0.129, "rewards/accuracies": 1.0, "rewards/chosen": 0.2611718773841858, "rewards/margins": 2.866994857788086, "rewards/rejected": -2.605823040008545, "step": 8470 }, { "epoch": 0.98, "learning_rate": 7.128643333723516e-09, "logits/chosen": -3.824342966079712, "logits/rejected": -3.4010262489318848, "logps/chosen": -320.6964111328125, "logps/rejected": -244.51654052734375, "loss": 0.5234, "rewards/accuracies": 0.75, "rewards/chosen": -0.02732469141483307, "rewards/margins": 1.2796918153762817, "rewards/rejected": -1.3070164918899536, "step": 8471 }, { "epoch": 0.98, "learning_rate": 7.093526864099262e-09, "logits/chosen": -3.0326199531555176, "logits/rejected": -3.2329461574554443, "logps/chosen": -160.24642944335938, "logps/rejected": -191.69921875, "loss": 0.3237, "rewards/accuracies": 0.875, "rewards/chosen": -0.21656447649002075, "rewards/margins": 2.095737934112549, "rewards/rejected": -2.312302589416504, "step": 8472 }, { "epoch": 0.98, "learning_rate": 7.058410394475009e-09, "logits/chosen": -3.5692758560180664, "logits/rejected": -3.743993043899536, "logps/chosen": -86.12794494628906, "logps/rejected": -214.58734130859375, "loss": 0.4124, "rewards/accuracies": 0.875, "rewards/chosen": -0.24406638741493225, "rewards/margins": 3.570068836212158, "rewards/rejected": -3.8141355514526367, "step": 8473 }, { "epoch": 0.98, "learning_rate": 7.023293924850755e-09, "logits/chosen": -3.36289381980896, "logits/rejected": -3.529120445251465, "logps/chosen": -241.98193359375, "logps/rejected": -250.06793212890625, "loss": 0.4929, "rewards/accuracies": 0.75, "rewards/chosen": 0.014819830656051636, "rewards/margins": 1.5112448930740356, "rewards/rejected": -1.4964251518249512, "step": 8474 }, { "epoch": 0.98, "learning_rate": 6.9881774552265016e-09, "logits/chosen": -3.2262582778930664, "logits/rejected": -3.4128856658935547, "logps/chosen": -307.86407470703125, "logps/rejected": -314.9330749511719, "loss": 0.4762, "rewards/accuracies": 0.625, "rewards/chosen": -0.16166181862354279, "rewards/margins": 1.8200843334197998, "rewards/rejected": -1.9817461967468262, "step": 8475 }, { "epoch": 0.98, "learning_rate": 6.953060985602247e-09, "logits/chosen": -3.9772093296051025, "logits/rejected": -3.6806013584136963, "logps/chosen": -388.94378662109375, "logps/rejected": -288.0254821777344, "loss": 0.2267, "rewards/accuracies": 0.875, "rewards/chosen": -0.003414541482925415, "rewards/margins": 2.201543092727661, "rewards/rejected": -2.204957962036133, "step": 8476 }, { "epoch": 0.98, "learning_rate": 6.9179445159779936e-09, "logits/chosen": -3.043762683868408, "logits/rejected": -3.070531129837036, "logps/chosen": -578.2501831054688, "logps/rejected": -404.6870422363281, "loss": 0.8866, "rewards/accuracies": 0.5, "rewards/chosen": -1.162374496459961, "rewards/margins": -0.010277248919010162, "rewards/rejected": -1.1520973443984985, "step": 8477 }, { "epoch": 0.98, "learning_rate": 6.882828046353739e-09, "logits/chosen": -3.502081871032715, "logits/rejected": -3.3114190101623535, "logps/chosen": -226.29159545898438, "logps/rejected": -257.4394836425781, "loss": 0.1727, "rewards/accuracies": 1.0, "rewards/chosen": 0.10398821532726288, "rewards/margins": 3.822413921356201, "rewards/rejected": -3.7184255123138428, "step": 8478 }, { "epoch": 0.98, "learning_rate": 6.8477115767294856e-09, "logits/chosen": -3.4955384731292725, "logits/rejected": -3.5288140773773193, "logps/chosen": -267.10162353515625, "logps/rejected": -265.1327209472656, "loss": 0.212, "rewards/accuracies": 0.875, "rewards/chosen": -0.058431342244148254, "rewards/margins": 2.580204725265503, "rewards/rejected": -2.6386358737945557, "step": 8479 }, { "epoch": 0.98, "learning_rate": 6.812595107105232e-09, "logits/chosen": -3.0712790489196777, "logits/rejected": -2.7467620372772217, "logps/chosen": -205.8949432373047, "logps/rejected": -279.7840881347656, "loss": 0.3864, "rewards/accuracies": 0.75, "rewards/chosen": -0.16965949535369873, "rewards/margins": 2.198101043701172, "rewards/rejected": -2.367760419845581, "step": 8480 }, { "epoch": 0.98, "learning_rate": 6.777478637480978e-09, "logits/chosen": -3.021322250366211, "logits/rejected": -3.015963554382324, "logps/chosen": -307.05279541015625, "logps/rejected": -321.27960205078125, "loss": 0.6581, "rewards/accuracies": 0.625, "rewards/chosen": 0.09905458986759186, "rewards/margins": 0.3263217508792877, "rewards/rejected": -0.22726716101169586, "step": 8481 }, { "epoch": 0.98, "learning_rate": 6.742362167856725e-09, "logits/chosen": -3.4139211177825928, "logits/rejected": -2.736502170562744, "logps/chosen": -338.7869567871094, "logps/rejected": -193.4683074951172, "loss": 0.2982, "rewards/accuracies": 0.875, "rewards/chosen": 0.0066890716552734375, "rewards/margins": 1.8478224277496338, "rewards/rejected": -1.8411333560943604, "step": 8482 }, { "epoch": 0.98, "learning_rate": 6.70724569823247e-09, "logits/chosen": -3.256650686264038, "logits/rejected": -3.445671558380127, "logps/chosen": -146.2180633544922, "logps/rejected": -194.78274536132812, "loss": 0.6388, "rewards/accuracies": 0.625, "rewards/chosen": -0.37593984603881836, "rewards/margins": 1.6026803255081177, "rewards/rejected": -1.9786202907562256, "step": 8483 }, { "epoch": 0.98, "learning_rate": 6.672129228608217e-09, "logits/chosen": -3.311394214630127, "logits/rejected": -3.544567584991455, "logps/chosen": -254.2255096435547, "logps/rejected": -222.4074249267578, "loss": 0.5799, "rewards/accuracies": 0.625, "rewards/chosen": -1.0866827964782715, "rewards/margins": 1.1077947616577148, "rewards/rejected": -2.1944775581359863, "step": 8484 }, { "epoch": 0.98, "learning_rate": 6.637012758983963e-09, "logits/chosen": -2.837104320526123, "logits/rejected": -2.44183349609375, "logps/chosen": -400.37689208984375, "logps/rejected": -481.4322814941406, "loss": 0.2455, "rewards/accuracies": 0.875, "rewards/chosen": -0.351169615983963, "rewards/margins": 1.9379820823669434, "rewards/rejected": -2.289151668548584, "step": 8485 }, { "epoch": 0.98, "learning_rate": 6.60189628935971e-09, "logits/chosen": -2.8657331466674805, "logits/rejected": -2.897822856903076, "logps/chosen": -310.7184753417969, "logps/rejected": -251.59860229492188, "loss": 0.5025, "rewards/accuracies": 0.75, "rewards/chosen": -0.15521568059921265, "rewards/margins": 1.4926784038543701, "rewards/rejected": -1.6478941440582275, "step": 8486 }, { "epoch": 0.98, "learning_rate": 6.566779819735456e-09, "logits/chosen": -3.03426456451416, "logits/rejected": -2.9944827556610107, "logps/chosen": -253.21038818359375, "logps/rejected": -169.9788055419922, "loss": 0.3845, "rewards/accuracies": 0.75, "rewards/chosen": -0.032130323350429535, "rewards/margins": 1.5077852010726929, "rewards/rejected": -1.5399155616760254, "step": 8487 }, { "epoch": 0.98, "learning_rate": 6.5316633501112024e-09, "logits/chosen": -2.1996219158172607, "logits/rejected": -2.5172324180603027, "logps/chosen": -405.1337890625, "logps/rejected": -292.83935546875, "loss": 0.2973, "rewards/accuracies": 0.875, "rewards/chosen": -0.006255242973566055, "rewards/margins": 1.2822327613830566, "rewards/rejected": -1.2884879112243652, "step": 8488 }, { "epoch": 0.98, "learning_rate": 6.496546880486949e-09, "logits/chosen": -3.8786821365356445, "logits/rejected": -3.766284704208374, "logps/chosen": -295.08123779296875, "logps/rejected": -263.99896240234375, "loss": 0.2219, "rewards/accuracies": 0.875, "rewards/chosen": -0.6233601570129395, "rewards/margins": 2.8805785179138184, "rewards/rejected": -3.503938674926758, "step": 8489 }, { "epoch": 0.98, "learning_rate": 6.461430410862694e-09, "logits/chosen": -3.084059715270996, "logits/rejected": -3.265758514404297, "logps/chosen": -191.93821716308594, "logps/rejected": -339.48046875, "loss": 0.2274, "rewards/accuracies": 0.875, "rewards/chosen": 0.5367034673690796, "rewards/margins": 4.049424648284912, "rewards/rejected": -3.512721538543701, "step": 8490 }, { "epoch": 0.98, "learning_rate": 6.42631394123844e-09, "logits/chosen": -3.5621612071990967, "logits/rejected": -3.470043420791626, "logps/chosen": -312.8921813964844, "logps/rejected": -308.21453857421875, "loss": 0.5788, "rewards/accuracies": 0.75, "rewards/chosen": -0.032635778188705444, "rewards/margins": 1.9471313953399658, "rewards/rejected": -1.979767084121704, "step": 8491 }, { "epoch": 0.98, "learning_rate": 6.3911974716141864e-09, "logits/chosen": -3.699871063232422, "logits/rejected": -3.568979263305664, "logps/chosen": -184.05386352539062, "logps/rejected": -222.36697387695312, "loss": 0.3516, "rewards/accuracies": 0.875, "rewards/chosen": 0.13250714540481567, "rewards/margins": 2.4029572010040283, "rewards/rejected": -2.2704498767852783, "step": 8492 }, { "epoch": 0.98, "learning_rate": 6.356081001989933e-09, "logits/chosen": -3.044790029525757, "logits/rejected": -2.9673855304718018, "logps/chosen": -375.24090576171875, "logps/rejected": -250.930908203125, "loss": 0.237, "rewards/accuracies": 1.0, "rewards/chosen": -0.012716822326183319, "rewards/margins": 1.8871909379959106, "rewards/rejected": -1.8999075889587402, "step": 8493 }, { "epoch": 0.98, "learning_rate": 6.320964532365679e-09, "logits/chosen": -3.293546676635742, "logits/rejected": -3.1234829425811768, "logps/chosen": -462.0926513671875, "logps/rejected": -400.81756591796875, "loss": 0.1975, "rewards/accuracies": 1.0, "rewards/chosen": -0.4342710077762604, "rewards/margins": 2.308196783065796, "rewards/rejected": -2.7424678802490234, "step": 8494 }, { "epoch": 0.98, "learning_rate": 6.285848062741426e-09, "logits/chosen": -3.0001041889190674, "logits/rejected": -3.493598222732544, "logps/chosen": -205.2208251953125, "logps/rejected": -309.0013732910156, "loss": 0.3268, "rewards/accuracies": 0.875, "rewards/chosen": -0.13551993668079376, "rewards/margins": 2.311723470687866, "rewards/rejected": -2.4472434520721436, "step": 8495 }, { "epoch": 0.98, "learning_rate": 6.250731593117172e-09, "logits/chosen": -2.2359471321105957, "logits/rejected": -2.4334816932678223, "logps/chosen": -203.56834411621094, "logps/rejected": -299.93548583984375, "loss": 0.433, "rewards/accuracies": 0.75, "rewards/chosen": -0.4051065444946289, "rewards/margins": 1.6684437990188599, "rewards/rejected": -2.07354998588562, "step": 8496 }, { "epoch": 0.98, "learning_rate": 6.2156151234929185e-09, "logits/chosen": -3.732530117034912, "logits/rejected": -3.6657090187072754, "logps/chosen": -192.59141540527344, "logps/rejected": -245.81906127929688, "loss": 0.1651, "rewards/accuracies": 1.0, "rewards/chosen": 0.2982720732688904, "rewards/margins": 2.7652266025543213, "rewards/rejected": -2.466954469680786, "step": 8497 }, { "epoch": 0.98, "learning_rate": 6.180498653868664e-09, "logits/chosen": -3.031069278717041, "logits/rejected": -3.448622703552246, "logps/chosen": -195.2930450439453, "logps/rejected": -226.67454528808594, "loss": 0.3733, "rewards/accuracies": 0.75, "rewards/chosen": -0.04790334403514862, "rewards/margins": 2.161766290664673, "rewards/rejected": -2.209669589996338, "step": 8498 }, { "epoch": 0.98, "learning_rate": 6.1453821842444105e-09, "logits/chosen": -3.546642303466797, "logits/rejected": -3.4998908042907715, "logps/chosen": -249.80780029296875, "logps/rejected": -179.9160614013672, "loss": 0.3124, "rewards/accuracies": 0.875, "rewards/chosen": 0.24191740155220032, "rewards/margins": 1.7192165851593018, "rewards/rejected": -1.4772990942001343, "step": 8499 }, { "epoch": 0.98, "learning_rate": 6.110265714620157e-09, "logits/chosen": -3.4534051418304443, "logits/rejected": -3.1558313369750977, "logps/chosen": -163.5133514404297, "logps/rejected": -195.2133331298828, "loss": 1.0196, "rewards/accuracies": 0.5, "rewards/chosen": -0.6240538954734802, "rewards/margins": 0.6159093379974365, "rewards/rejected": -1.2399632930755615, "step": 8500 } ], "logging_steps": 1, "max_steps": 8674, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }