{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4647, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.075268817204301e-09, "logits/chosen": -1.841247797012329, "logits/rejected": -1.8849564790725708, "logps/chosen": -167.1073760986328, "logps/rejected": -149.67556762695312, "loss": 4765.625, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "rewards/safe_rewards": 0.0, "rewards/unsafe_rewards": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.0752688172043011e-08, "logits/chosen": -1.6423417329788208, "logits/rejected": -1.8178623914718628, "logps/chosen": -177.31399536132812, "logps/rejected": -141.75772094726562, "loss": 4868.4926, "rewards/accuracies": 0.3680555522441864, "rewards/chosen": 8.103264553938061e-05, "rewards/margins": 8.709639951121062e-05, "rewards/rejected": -6.063799446565099e-06, "rewards/safe_rewards": -7.670136983506382e-05, "rewards/unsafe_rewards": 0.0006762976991012692, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.1505376344086022e-08, "logits/chosen": -1.6758562326431274, "logits/rejected": -1.7978204488754272, "logps/chosen": -176.1974639892578, "logps/rejected": -145.80984497070312, "loss": 5079.857, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.0002164030447602272, "rewards/margins": 0.0006695017218589783, "rewards/rejected": -0.0008859048830345273, "rewards/safe_rewards": -0.00048799518845044076, "rewards/unsafe_rewards": 0.0001096972991945222, "step": 20 }, { "epoch": 0.01, "learning_rate": 3.225806451612903e-08, "logits/chosen": -1.632964849472046, "logits/rejected": -1.802512764930725, "logps/chosen": -181.90237426757812, "logps/rejected": -145.13772583007812, "loss": 4594.9332, "rewards/accuracies": 0.5, "rewards/chosen": -0.0006980699836276472, "rewards/margins": -4.482408985495567e-05, "rewards/rejected": -0.0006532460101880133, "rewards/safe_rewards": -0.0002692498965188861, "rewards/unsafe_rewards": -0.0012058038264513016, "step": 30 }, { "epoch": 0.01, "learning_rate": 4.3010752688172045e-08, "logits/chosen": -1.6477152109146118, "logits/rejected": -1.7816171646118164, "logps/chosen": -181.59109497070312, "logps/rejected": -144.6480255126953, "loss": 5053.9109, "rewards/accuracies": 0.581250011920929, "rewards/chosen": 2.3325381334871054e-05, "rewards/margins": 0.0005525334272533655, "rewards/rejected": -0.0005292078712955117, "rewards/safe_rewards": -9.924652840709314e-05, "rewards/unsafe_rewards": 4.069740043632919e-06, "step": 40 }, { "epoch": 0.01, "learning_rate": 5.3763440860215054e-08, "logits/chosen": -1.6232709884643555, "logits/rejected": -1.7804291248321533, "logps/chosen": -182.57701110839844, "logps/rejected": -150.60208129882812, "loss": 5280.982, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.000384445593226701, "rewards/margins": 0.00015602678467985243, "rewards/rejected": -0.0005404725670814514, "rewards/safe_rewards": -0.0006467961939051747, "rewards/unsafe_rewards": 0.00034715747460722923, "step": 50 }, { "epoch": 0.01, "learning_rate": 6.451612903225806e-08, "logits/chosen": -1.657025933265686, "logits/rejected": -1.753915786743164, "logps/chosen": -179.78451538085938, "logps/rejected": -155.54901123046875, "loss": 4930.4258, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.0005824470426887274, "rewards/margins": 0.0002184467448387295, "rewards/rejected": 0.00036400032695382833, "rewards/safe_rewards": 0.0006485666963271797, "rewards/unsafe_rewards": 0.001282638986594975, "step": 60 }, { "epoch": 0.02, "learning_rate": 7.526881720430107e-08, "logits/chosen": -1.6523510217666626, "logits/rejected": -1.8247814178466797, "logps/chosen": -188.77206420898438, "logps/rejected": -150.7940216064453, "loss": 5014.5938, "rewards/accuracies": 0.5, "rewards/chosen": 0.0003752160118892789, "rewards/margins": 0.00034005282213911414, "rewards/rejected": 3.5163131542503834e-05, "rewards/safe_rewards": 0.0004532594757620245, "rewards/unsafe_rewards": 0.0005900125252082944, "step": 70 }, { "epoch": 0.02, "learning_rate": 8.602150537634409e-08, "logits/chosen": -1.621221899986267, "logits/rejected": -1.740878701210022, "logps/chosen": -181.41552734375, "logps/rejected": -157.7497100830078, "loss": 4820.8523, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 5.9171696193516254e-05, "rewards/margins": 0.0004232068022247404, "rewards/rejected": -0.0003640351351350546, "rewards/safe_rewards": -6.192202999955043e-05, "rewards/unsafe_rewards": 0.0005873221671208739, "step": 80 }, { "epoch": 0.02, "learning_rate": 9.677419354838709e-08, "logits/chosen": -1.6190576553344727, "logits/rejected": -1.7482248544692993, "logps/chosen": -182.7442169189453, "logps/rejected": -153.75466918945312, "loss": 4987.3234, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.0004818606248591095, "rewards/margins": -0.00032719509908929467, "rewards/rejected": -0.00015466543845832348, "rewards/safe_rewards": -0.000652085233014077, "rewards/unsafe_rewards": -0.0005871877656318247, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.0752688172043011e-07, "logits/chosen": -1.650480031967163, "logits/rejected": -1.8047542572021484, "logps/chosen": -174.82101440429688, "logps/rejected": -148.59976196289062, "loss": 4933.0617, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0007007865933701396, "rewards/margins": 0.0005903373239561915, "rewards/rejected": 0.00011044931306969374, "rewards/safe_rewards": 0.0004853305872529745, "rewards/unsafe_rewards": 0.0007110767182894051, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.1827956989247312e-07, "logits/chosen": -1.6777995824813843, "logits/rejected": -1.766939401626587, "logps/chosen": -170.70761108398438, "logps/rejected": -148.52256774902344, "loss": 4762.4129, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.0006197410402819514, "rewards/margins": 0.0004636083322111517, "rewards/rejected": 0.00015613269351888448, "rewards/safe_rewards": 0.0008179573342204094, "rewards/unsafe_rewards": 0.00011222409375477582, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.2903225806451611e-07, "logits/chosen": -1.6350778341293335, "logits/rejected": -1.7880094051361084, "logps/chosen": -185.482177734375, "logps/rejected": -153.6200714111328, "loss": 4668.05, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -7.413981802528724e-05, "rewards/margins": -0.0004231950733810663, "rewards/rejected": 0.0003490553062874824, "rewards/safe_rewards": -0.00027299358043819666, "rewards/unsafe_rewards": 7.147435212573328e-07, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.3978494623655912e-07, "logits/chosen": -1.700193166732788, "logits/rejected": -1.8245309591293335, "logps/chosen": -173.86248779296875, "logps/rejected": -149.3970947265625, "loss": 4669.384, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.0002837815263774246, "rewards/margins": -6.974933785386384e-05, "rewards/rejected": 0.0003535308933351189, "rewards/safe_rewards": 0.00020703506015706807, "rewards/unsafe_rewards": 0.0008100089617073536, "step": 130 }, { "epoch": 0.03, "learning_rate": 1.5053763440860215e-07, "logits/chosen": -1.6460044384002686, "logits/rejected": -1.7600021362304688, "logps/chosen": -175.18763732910156, "logps/rejected": -149.30081176757812, "loss": 5098.3371, "rewards/accuracies": 0.5, "rewards/chosen": -3.965566793340258e-05, "rewards/margins": 7.248640758916736e-05, "rewards/rejected": -0.00011214206460863352, "rewards/safe_rewards": 0.0001699734275462106, "rewards/unsafe_rewards": 0.0002426471037324518, "step": 140 }, { "epoch": 0.03, "learning_rate": 1.6129032258064515e-07, "logits/chosen": -1.6686697006225586, "logits/rejected": -1.7786643505096436, "logps/chosen": -184.40078735351562, "logps/rejected": -157.69442749023438, "loss": 4833.6133, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": 0.000700628268532455, "rewards/margins": 0.0003196108737029135, "rewards/rejected": 0.0003810174239333719, "rewards/safe_rewards": 0.0005840963567607105, "rewards/unsafe_rewards": 0.0008205543272197247, "step": 150 }, { "epoch": 0.03, "learning_rate": 1.7204301075268818e-07, "logits/chosen": -1.667669653892517, "logits/rejected": -1.7461395263671875, "logps/chosen": -170.09548950195312, "logps/rejected": -146.25169372558594, "loss": 4640.9695, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.00011617229029070586, "rewards/margins": 0.0005715423030778766, "rewards/rejected": -0.00045536994002759457, "rewards/safe_rewards": 0.0006092834519222379, "rewards/unsafe_rewards": 0.0005453643389046192, "step": 160 }, { "epoch": 0.04, "learning_rate": 1.8279569892473118e-07, "logits/chosen": -1.610443115234375, "logits/rejected": -1.7399629354476929, "logps/chosen": -178.875244140625, "logps/rejected": -150.24319458007812, "loss": 5295.4172, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0005436358042061329, "rewards/margins": 0.0005223040352575481, "rewards/rejected": 2.1331816242309287e-05, "rewards/safe_rewards": 0.0008945087902247906, "rewards/unsafe_rewards": 0.0011400504736229777, "step": 170 }, { "epoch": 0.04, "learning_rate": 1.9354838709677418e-07, "logits/chosen": -1.6371736526489258, "logits/rejected": -1.7439731359481812, "logps/chosen": -183.94761657714844, "logps/rejected": -154.1298828125, "loss": 4598.5172, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.0014514935901388526, "rewards/margins": 0.0010785970371216536, "rewards/rejected": 0.00037289661122485995, "rewards/safe_rewards": 0.0019232084741815925, "rewards/unsafe_rewards": 0.001454713405109942, "step": 180 }, { "epoch": 0.04, "learning_rate": 2.0430107526881721e-07, "logits/chosen": -1.6278107166290283, "logits/rejected": -1.7356603145599365, "logps/chosen": -182.24557495117188, "logps/rejected": -156.81800842285156, "loss": 5144.5426, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 1.4783569895371329e-05, "rewards/margins": -6.826427852502093e-05, "rewards/rejected": 8.304786024382338e-05, "rewards/safe_rewards": 0.00041043470264412463, "rewards/unsafe_rewards": 0.00021105670020915568, "step": 190 }, { "epoch": 0.04, "learning_rate": 2.1505376344086022e-07, "logits/chosen": -1.655765175819397, "logits/rejected": -1.7389318943023682, "logps/chosen": -188.095703125, "logps/rejected": -159.7158203125, "loss": 4402.584, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.0007904047961346805, "rewards/margins": 0.00011898848606506363, "rewards/rejected": 0.000671416346449405, "rewards/safe_rewards": 0.0011025696294382215, "rewards/unsafe_rewards": 0.000831533398013562, "step": 200 }, { "epoch": 0.05, "learning_rate": 2.2580645161290322e-07, "logits/chosen": -1.6260929107666016, "logits/rejected": -1.8064696788787842, "logps/chosen": -189.74038696289062, "logps/rejected": -148.06442260742188, "loss": 5035.5453, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.0011954107321798801, "rewards/margins": 0.0011411209125071764, "rewards/rejected": 5.4289866966428235e-05, "rewards/safe_rewards": 0.0008738207397982478, "rewards/unsafe_rewards": 0.0012644792441278696, "step": 210 }, { "epoch": 0.05, "learning_rate": 2.3655913978494625e-07, "logits/chosen": -1.6361995935440063, "logits/rejected": -1.7557201385498047, "logps/chosen": -175.93360900878906, "logps/rejected": -143.96009826660156, "loss": 4839.7047, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": 0.0012131560361012816, "rewards/margins": 0.0003900973533745855, "rewards/rejected": 0.0008230588282458484, "rewards/safe_rewards": 0.0006345367291942239, "rewards/unsafe_rewards": 0.000958856544457376, "step": 220 }, { "epoch": 0.05, "learning_rate": 2.473118279569892e-07, "logits/chosen": -1.6708686351776123, "logits/rejected": -1.7433046102523804, "logps/chosen": -180.04342651367188, "logps/rejected": -151.77484130859375, "loss": 5059.5211, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.0019879289902746677, "rewards/margins": 0.001310329302214086, "rewards/rejected": 0.0006775996880605817, "rewards/safe_rewards": 0.0013025322696194053, "rewards/unsafe_rewards": 0.0012676838086917996, "step": 230 }, { "epoch": 0.05, "learning_rate": 2.5806451612903223e-07, "logits/chosen": -1.590618371963501, "logits/rejected": -1.7364962100982666, "logps/chosen": -185.75338745117188, "logps/rejected": -152.9048614501953, "loss": 5165.507, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": 0.0014765148516744375, "rewards/margins": 0.0005922848358750343, "rewards/rejected": 0.0008842298993840814, "rewards/safe_rewards": 0.0018072084058076143, "rewards/unsafe_rewards": 0.0016945044044405222, "step": 240 }, { "epoch": 0.05, "learning_rate": 2.6881720430107523e-07, "logits/chosen": -1.603245496749878, "logits/rejected": -1.719818353652954, "logps/chosen": -190.37606811523438, "logps/rejected": -153.3588409423828, "loss": 5126.1992, "rewards/accuracies": 0.59375, "rewards/chosen": 0.0028493269346654415, "rewards/margins": 0.0013326896587386727, "rewards/rejected": 0.001516637159511447, "rewards/safe_rewards": 0.002289222087711096, "rewards/unsafe_rewards": 0.0023051018361002207, "step": 250 }, { "epoch": 0.06, "learning_rate": 2.7956989247311823e-07, "logits/chosen": -1.6492557525634766, "logits/rejected": -1.7548034191131592, "logps/chosen": -178.9506378173828, "logps/rejected": -153.60031127929688, "loss": 4167.5594, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0027607923839241266, "rewards/margins": 0.0011466488940641284, "rewards/rejected": 0.0016141438391059637, "rewards/safe_rewards": 0.0032476962078362703, "rewards/unsafe_rewards": 0.002969850320369005, "step": 260 }, { "epoch": 0.06, "learning_rate": 2.903225806451613e-07, "logits/chosen": -1.629494071006775, "logits/rejected": -1.7552188634872437, "logps/chosen": -173.79818725585938, "logps/rejected": -148.6545867919922, "loss": 4443.3527, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0029586311429739, "rewards/margins": 0.001045998535118997, "rewards/rejected": 0.0019126326078549027, "rewards/safe_rewards": 0.0028548080008476973, "rewards/unsafe_rewards": 0.0026918419171124697, "step": 270 }, { "epoch": 0.06, "learning_rate": 3.010752688172043e-07, "logits/chosen": -1.6415894031524658, "logits/rejected": -1.7522720098495483, "logps/chosen": -186.55618286132812, "logps/rejected": -152.03936767578125, "loss": 4414.1902, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": 0.0033487025648355484, "rewards/margins": 0.0017903689295053482, "rewards/rejected": 0.0015583334024995565, "rewards/safe_rewards": 0.0033560022711753845, "rewards/unsafe_rewards": 0.0033839803654700518, "step": 280 }, { "epoch": 0.06, "learning_rate": 3.118279569892473e-07, "logits/chosen": -1.636724829673767, "logits/rejected": -1.769683837890625, "logps/chosen": -173.9744415283203, "logps/rejected": -152.8893280029297, "loss": 5109.1273, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.0038763233460485935, "rewards/margins": 0.0012204552767798305, "rewards/rejected": 0.0026558679528534412, "rewards/safe_rewards": 0.004610470030456781, "rewards/unsafe_rewards": 0.0028645608108490705, "step": 290 }, { "epoch": 0.06, "learning_rate": 3.225806451612903e-07, "logits/chosen": -1.6296586990356445, "logits/rejected": -1.780203104019165, "logps/chosen": -182.64984130859375, "logps/rejected": -149.45523071289062, "loss": 5198.9445, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.004301336593925953, "rewards/margins": 0.0017894627526402473, "rewards/rejected": 0.0025118738412857056, "rewards/safe_rewards": 0.003290946129709482, "rewards/unsafe_rewards": 0.003437974024564028, "step": 300 }, { "epoch": 0.07, "learning_rate": 3.333333333333333e-07, "logits/chosen": -1.6517350673675537, "logits/rejected": -1.768850564956665, "logps/chosen": -169.07781982421875, "logps/rejected": -144.01022338867188, "loss": 4964.0891, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.0044082580134272575, "rewards/margins": 0.0017976727103814483, "rewards/rejected": 0.0026105858851224184, "rewards/safe_rewards": 0.003579071955755353, "rewards/unsafe_rewards": 0.004275632090866566, "step": 310 }, { "epoch": 0.07, "learning_rate": 3.4408602150537636e-07, "logits/chosen": -1.6342108249664307, "logits/rejected": -1.7873916625976562, "logps/chosen": -183.35691833496094, "logps/rejected": -150.28756713867188, "loss": 5079.6977, "rewards/accuracies": 0.581250011920929, "rewards/chosen": 0.005157695151865482, "rewards/margins": 0.0014037080109119415, "rewards/rejected": 0.0037539873737841845, "rewards/safe_rewards": 0.005128889810293913, "rewards/unsafe_rewards": 0.005544352810829878, "step": 320 }, { "epoch": 0.07, "learning_rate": 3.5483870967741936e-07, "logits/chosen": -1.6084163188934326, "logits/rejected": -1.747150182723999, "logps/chosen": -181.58401489257812, "logps/rejected": -150.8043212890625, "loss": 5343.6148, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.006452556699514389, "rewards/margins": 0.002416735514998436, "rewards/rejected": 0.004035821184515953, "rewards/safe_rewards": 0.008293990045785904, "rewards/unsafe_rewards": 0.006642967462539673, "step": 330 }, { "epoch": 0.07, "learning_rate": 3.6559139784946236e-07, "logits/chosen": -1.6189358234405518, "logits/rejected": -1.7920001745224, "logps/chosen": -182.53329467773438, "logps/rejected": -149.87557983398438, "loss": 4984.8328, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.006708007305860519, "rewards/margins": 0.002258298685774207, "rewards/rejected": 0.004449709318578243, "rewards/safe_rewards": 0.0067873550578951836, "rewards/unsafe_rewards": 0.006995205767452717, "step": 340 }, { "epoch": 0.08, "learning_rate": 3.7634408602150537e-07, "logits/chosen": -1.635362982749939, "logits/rejected": -1.7557449340820312, "logps/chosen": -181.4214324951172, "logps/rejected": -154.4316864013672, "loss": 5095.7883, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.007162845227867365, "rewards/margins": 0.002463871380314231, "rewards/rejected": 0.00469897361472249, "rewards/safe_rewards": 0.00854283757507801, "rewards/unsafe_rewards": 0.007227322552353144, "step": 350 }, { "epoch": 0.08, "learning_rate": 3.8709677419354837e-07, "logits/chosen": -1.6059119701385498, "logits/rejected": -1.7551063299179077, "logps/chosen": -179.26219177246094, "logps/rejected": -147.4024658203125, "loss": 5196.9688, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.007787021808326244, "rewards/margins": 0.0031800237484276295, "rewards/rejected": 0.004606998525559902, "rewards/safe_rewards": 0.007409608457237482, "rewards/unsafe_rewards": 0.00854805950075388, "step": 360 }, { "epoch": 0.08, "learning_rate": 3.9784946236559137e-07, "logits/chosen": -1.62418532371521, "logits/rejected": -1.7656466960906982, "logps/chosen": -183.9157257080078, "logps/rejected": -155.6136016845703, "loss": 4573.1066, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.008934036828577518, "rewards/margins": 0.0038906086701899767, "rewards/rejected": 0.005043427459895611, "rewards/safe_rewards": 0.009670114144682884, "rewards/unsafe_rewards": 0.010018108412623405, "step": 370 }, { "epoch": 0.08, "learning_rate": 4.0860215053763443e-07, "logits/chosen": -1.6141941547393799, "logits/rejected": -1.7588341236114502, "logps/chosen": -185.079833984375, "logps/rejected": -149.375732421875, "loss": 4786.9766, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.00921496469527483, "rewards/margins": 0.004359695594757795, "rewards/rejected": 0.004855268634855747, "rewards/safe_rewards": 0.009436806663870811, "rewards/unsafe_rewards": 0.008994337171316147, "step": 380 }, { "epoch": 0.08, "learning_rate": 4.1935483870967743e-07, "logits/chosen": -1.6285076141357422, "logits/rejected": -1.781732201576233, "logps/chosen": -179.26141357421875, "logps/rejected": -141.5403289794922, "loss": 4934.1719, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.010095859877765179, "rewards/margins": 0.0052119106985628605, "rewards/rejected": 0.0048839496448636055, "rewards/safe_rewards": 0.008644427172839642, "rewards/unsafe_rewards": 0.010187952779233456, "step": 390 }, { "epoch": 0.09, "learning_rate": 4.3010752688172043e-07, "logits/chosen": -1.6339868307113647, "logits/rejected": -1.7780730724334717, "logps/chosen": -180.1566619873047, "logps/rejected": -147.26113891601562, "loss": 4626.2391, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.011507261544466019, "rewards/margins": 0.004493533633649349, "rewards/rejected": 0.007013729307800531, "rewards/safe_rewards": 0.012333944439888, "rewards/unsafe_rewards": 0.01197083480656147, "step": 400 }, { "epoch": 0.09, "learning_rate": 4.4086021505376344e-07, "logits/chosen": -1.5699560642242432, "logits/rejected": -1.7230558395385742, "logps/chosen": -180.6854248046875, "logps/rejected": -149.8937530517578, "loss": 5042.7809, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.012266886420547962, "rewards/margins": 0.00453173415735364, "rewards/rejected": 0.007735154125839472, "rewards/safe_rewards": 0.011004628613591194, "rewards/unsafe_rewards": 0.011805159039795399, "step": 410 }, { "epoch": 0.09, "learning_rate": 4.5161290322580644e-07, "logits/chosen": -1.578264594078064, "logits/rejected": -1.7353363037109375, "logps/chosen": -174.95321655273438, "logps/rejected": -149.95555114746094, "loss": 5377.0539, "rewards/accuracies": 0.6875, "rewards/chosen": 0.013331146910786629, "rewards/margins": 0.004369753412902355, "rewards/rejected": 0.008961394429206848, "rewards/safe_rewards": 0.012373859994113445, "rewards/unsafe_rewards": 0.014804934151470661, "step": 420 }, { "epoch": 0.09, "learning_rate": 4.6236559139784944e-07, "logits/chosen": -1.5930712223052979, "logits/rejected": -1.776719093322754, "logps/chosen": -184.4582977294922, "logps/rejected": -146.8081512451172, "loss": 4674.5648, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.013796287588775158, "rewards/margins": 0.005463213659822941, "rewards/rejected": 0.008333073928952217, "rewards/safe_rewards": 0.013725158758461475, "rewards/unsafe_rewards": 0.012463893741369247, "step": 430 }, { "epoch": 0.09, "learning_rate": 4.731182795698925e-07, "logits/chosen": -1.596901297569275, "logits/rejected": -1.7505356073379517, "logps/chosen": -180.2940216064453, "logps/rejected": -145.6876983642578, "loss": 4652.3328, "rewards/accuracies": 0.65625, "rewards/chosen": 0.0151588823646307, "rewards/margins": 0.00505469273775816, "rewards/rejected": 0.010104191489517689, "rewards/safe_rewards": 0.014855710789561272, "rewards/unsafe_rewards": 0.01406886987388134, "step": 440 }, { "epoch": 0.1, "learning_rate": 4.838709677419355e-07, "logits/chosen": -1.5907971858978271, "logits/rejected": -1.7229697704315186, "logps/chosen": -179.81771850585938, "logps/rejected": -150.8184356689453, "loss": 4680.3168, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.015589353628456593, "rewards/margins": 0.005617454648017883, "rewards/rejected": 0.00997189711779356, "rewards/safe_rewards": 0.014203856699168682, "rewards/unsafe_rewards": 0.01540838647633791, "step": 450 }, { "epoch": 0.1, "learning_rate": 4.946236559139784e-07, "logits/chosen": -1.5985844135284424, "logits/rejected": -1.7727864980697632, "logps/chosen": -183.23614501953125, "logps/rejected": -148.07064819335938, "loss": 4658.5688, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.0159279927611351, "rewards/margins": 0.005618585739284754, "rewards/rejected": 0.010309405624866486, "rewards/safe_rewards": 0.016207443550229073, "rewards/unsafe_rewards": 0.018266305327415466, "step": 460 }, { "epoch": 0.1, "learning_rate": 4.999982364767102e-07, "logits/chosen": -1.5977035760879517, "logits/rejected": -1.7701795101165771, "logps/chosen": -190.30982971191406, "logps/rejected": -152.16534423828125, "loss": 5082.4906, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.016843190416693687, "rewards/margins": 0.005764626897871494, "rewards/rejected": 0.011078564450144768, "rewards/safe_rewards": 0.017280535772442818, "rewards/unsafe_rewards": 0.01776563748717308, "step": 470 }, { "epoch": 0.1, "learning_rate": 4.99984128439672e-07, "logits/chosen": -1.6032336950302124, "logits/rejected": -1.7363704442977905, "logps/chosen": -181.87295532226562, "logps/rejected": -150.46502685546875, "loss": 5003.4578, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.016057860106229782, "rewards/margins": 0.006917749997228384, "rewards/rejected": 0.009140107780694962, "rewards/safe_rewards": 0.014563520438969135, "rewards/unsafe_rewards": 0.0155752869322896, "step": 480 }, { "epoch": 0.11, "learning_rate": 4.999559131617482e-07, "logits/chosen": -1.64413321018219, "logits/rejected": -1.7672207355499268, "logps/chosen": -175.6224822998047, "logps/rejected": -154.136962890625, "loss": 4943.0668, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.016648801043629646, "rewards/margins": 0.006107917986810207, "rewards/rejected": 0.010540880262851715, "rewards/safe_rewards": 0.015573101118206978, "rewards/unsafe_rewards": 0.01667347177863121, "step": 490 }, { "epoch": 0.11, "learning_rate": 4.999135922351986e-07, "logits/chosen": -1.6067495346069336, "logits/rejected": -1.7534431219100952, "logps/chosen": -180.584228515625, "logps/rejected": -149.09356689453125, "loss": 4842.2766, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.01702262833714485, "rewards/margins": 0.005787975620478392, "rewards/rejected": 0.011234650388360023, "rewards/safe_rewards": 0.015872756019234657, "rewards/unsafe_rewards": 0.014531852677464485, "step": 500 }, { "epoch": 0.11, "eval_logits/chosen": -1.6386401653289795, "eval_logits/rejected": -1.783385992050171, "eval_logps/chosen": -178.1578826904297, "eval_logps/rejected": -147.39076232910156, "eval_loss": 4952.8876953125, "eval_rewards/accuracies": 0.65730881690979, "eval_rewards/chosen": 0.016581717878580093, "eval_rewards/margins": 0.007021909113973379, "eval_rewards/rejected": 0.009559808298945427, "eval_rewards/safe_rewards": 0.016590826213359833, "eval_rewards/unsafe_rewards": 0.016495846211910248, "eval_runtime": 1016.7709, "eval_samples_per_second": 32.499, "eval_steps_per_second": 1.016, "step": 500 }, { "epoch": 0.11, "learning_rate": 4.99857168048301e-07, "logits/chosen": -1.5994396209716797, "logits/rejected": -1.7624841928482056, "logps/chosen": -175.23167419433594, "logps/rejected": -146.50491333007812, "loss": 5033.2133, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.015994837507605553, "rewards/margins": 0.008380202576518059, "rewards/rejected": 0.007614634931087494, "rewards/safe_rewards": 0.013084257021546364, "rewards/unsafe_rewards": 0.015015569515526295, "step": 510 }, { "epoch": 0.11, "learning_rate": 4.997866437852159e-07, "logits/chosen": -1.6289539337158203, "logits/rejected": -1.7155992984771729, "logps/chosen": -178.26248168945312, "logps/rejected": -152.2622833251953, "loss": 4728.9391, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.01773754134774208, "rewards/margins": 0.007683499716222286, "rewards/rejected": 0.010054039768874645, "rewards/safe_rewards": 0.01605844870209694, "rewards/unsafe_rewards": 0.016240427270531654, "step": 520 }, { "epoch": 0.11, "learning_rate": 4.997020234258069e-07, "logits/chosen": -1.5814584493637085, "logits/rejected": -1.7459720373153687, "logps/chosen": -176.813720703125, "logps/rejected": -149.86904907226562, "loss": 4517.8047, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.018084099516272545, "rewards/margins": 0.008002784103155136, "rewards/rejected": 0.010081315413117409, "rewards/safe_rewards": 0.020032063126564026, "rewards/unsafe_rewards": 0.02132091112434864, "step": 530 }, { "epoch": 0.12, "learning_rate": 4.996033117454165e-07, "logits/chosen": -1.599437952041626, "logits/rejected": -1.7576488256454468, "logps/chosen": -179.58773803710938, "logps/rejected": -148.10227966308594, "loss": 5068.432, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.016654111444950104, "rewards/margins": 0.006452340632677078, "rewards/rejected": 0.0102017717435956, "rewards/safe_rewards": 0.014227221719920635, "rewards/unsafe_rewards": 0.01512549351900816, "step": 540 }, { "epoch": 0.12, "learning_rate": 4.994905143145961e-07, "logits/chosen": -1.6470963954925537, "logits/rejected": -1.7828181982040405, "logps/chosen": -174.0330352783203, "logps/rejected": -142.59207153320312, "loss": 5066.5559, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.019368816167116165, "rewards/margins": 0.010122648440301418, "rewards/rejected": 0.009246167726814747, "rewards/safe_rewards": 0.022048253566026688, "rewards/unsafe_rewards": 0.02015765570104122, "step": 550 }, { "epoch": 0.12, "learning_rate": 4.993636374987919e-07, "logits/chosen": -1.5742878913879395, "logits/rejected": -1.771630048751831, "logps/chosen": -179.45608520507812, "logps/rejected": -148.90863037109375, "loss": 4562.2719, "rewards/accuracies": 0.71875, "rewards/chosen": 0.019710134714841843, "rewards/margins": 0.010052054189145565, "rewards/rejected": 0.009658079594373703, "rewards/safe_rewards": 0.022419383749365807, "rewards/unsafe_rewards": 0.01770314760506153, "step": 560 }, { "epoch": 0.12, "learning_rate": 4.992226884579858e-07, "logits/chosen": -1.6308406591415405, "logits/rejected": -1.7758678197860718, "logps/chosen": -178.55325317382812, "logps/rejected": -144.25245666503906, "loss": 5224.409, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.01868765987455845, "rewards/margins": 0.010999324731528759, "rewards/rejected": 0.007688331417739391, "rewards/safe_rewards": 0.023108651861548424, "rewards/unsafe_rewards": 0.020929381251335144, "step": 570 }, { "epoch": 0.12, "learning_rate": 4.990676751462909e-07, "logits/chosen": -1.583311915397644, "logits/rejected": -1.7425196170806885, "logps/chosen": -188.9481658935547, "logps/rejected": -156.52073669433594, "loss": 4917.6828, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.022281045094132423, "rewards/margins": 0.012441162951290607, "rewards/rejected": 0.009839879348874092, "rewards/safe_rewards": 0.019118523225188255, "rewards/unsafe_rewards": 0.020860351622104645, "step": 580 }, { "epoch": 0.13, "learning_rate": 4.988986063115032e-07, "logits/chosen": -1.5757838487625122, "logits/rejected": -1.7138845920562744, "logps/chosen": -176.46083068847656, "logps/rejected": -151.3673858642578, "loss": 4556.9937, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.017805758863687515, "rewards/margins": 0.006349956151098013, "rewards/rejected": 0.011455804109573364, "rewards/safe_rewards": 0.01728048548102379, "rewards/unsafe_rewards": 0.02088850364089012, "step": 590 }, { "epoch": 0.13, "learning_rate": 4.987154914946075e-07, "logits/chosen": -1.557241678237915, "logits/rejected": -1.688554048538208, "logps/chosen": -176.87094116210938, "logps/rejected": -153.98683166503906, "loss": 4679.2695, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.01771743968129158, "rewards/margins": 0.007517918944358826, "rewards/rejected": 0.010199520736932755, "rewards/safe_rewards": 0.021529849618673325, "rewards/unsafe_rewards": 0.020726583898067474, "step": 600 }, { "epoch": 0.13, "learning_rate": 4.985183410292392e-07, "logits/chosen": -1.5332106351852417, "logits/rejected": -1.7057695388793945, "logps/chosen": -191.968505859375, "logps/rejected": -159.4260711669922, "loss": 4665.6539, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.020592408254742622, "rewards/margins": 0.009875278919935226, "rewards/rejected": 0.010717128403484821, "rewards/safe_rewards": 0.021703725680708885, "rewards/unsafe_rewards": 0.02059812657535076, "step": 610 }, { "epoch": 0.13, "learning_rate": 4.983071660411009e-07, "logits/chosen": -1.6287389993667603, "logits/rejected": -1.7617127895355225, "logps/chosen": -174.3006134033203, "logps/rejected": -145.588134765625, "loss": 4874.1313, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.019616086035966873, "rewards/margins": 0.011748552322387695, "rewards/rejected": 0.007867531850934029, "rewards/safe_rewards": 0.014821596443653107, "rewards/unsafe_rewards": 0.014429745264351368, "step": 620 }, { "epoch": 0.14, "learning_rate": 4.98081978447335e-07, "logits/chosen": -1.5919740200042725, "logits/rejected": -1.7246062755584717, "logps/chosen": -173.9262237548828, "logps/rejected": -148.3386688232422, "loss": 4797.5523, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.018630262464284897, "rewards/margins": 0.0073421066626906395, "rewards/rejected": 0.011288154870271683, "rewards/safe_rewards": 0.02062739059329033, "rewards/unsafe_rewards": 0.020729951560497284, "step": 630 }, { "epoch": 0.14, "learning_rate": 4.978427909558509e-07, "logits/chosen": -1.5789849758148193, "logits/rejected": -1.7101606130599976, "logps/chosen": -178.46669006347656, "logps/rejected": -147.86947631835938, "loss": 5017.7832, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.02059202454984188, "rewards/margins": 0.012809425592422485, "rewards/rejected": 0.007782601751387119, "rewards/safe_rewards": 0.021571138873696327, "rewards/unsafe_rewards": 0.02195223979651928, "step": 640 }, { "epoch": 0.14, "learning_rate": 4.975896170646077e-07, "logits/chosen": -1.5761319398880005, "logits/rejected": -1.7184231281280518, "logps/chosen": -180.64590454101562, "logps/rejected": -154.1398162841797, "loss": 4799.8531, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.01974358782172203, "rewards/margins": 0.00927906297147274, "rewards/rejected": 0.010464522056281567, "rewards/safe_rewards": 0.016188887879252434, "rewards/unsafe_rewards": 0.017980990931391716, "step": 650 }, { "epoch": 0.14, "learning_rate": 4.973224710608525e-07, "logits/chosen": -1.5909719467163086, "logits/rejected": -1.7431719303131104, "logps/chosen": -175.32537841796875, "logps/rejected": -146.88858032226562, "loss": 4494.7316, "rewards/accuracies": 0.6875, "rewards/chosen": 0.01952921412885189, "rewards/margins": 0.009676951915025711, "rewards/rejected": 0.009852261282503605, "rewards/safe_rewards": 0.018570493906736374, "rewards/unsafe_rewards": 0.019842050969600677, "step": 660 }, { "epoch": 0.14, "learning_rate": 4.970413680203148e-07, "logits/chosen": -1.6043914556503296, "logits/rejected": -1.7442528009414673, "logps/chosen": -171.8334197998047, "logps/rejected": -144.21194458007812, "loss": 4754.5383, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.01886538416147232, "rewards/margins": 0.011525439098477364, "rewards/rejected": 0.007339946925640106, "rewards/safe_rewards": 0.01839594542980194, "rewards/unsafe_rewards": 0.020214399322867393, "step": 670 }, { "epoch": 0.15, "learning_rate": 4.967463238063549e-07, "logits/chosen": -1.5533111095428467, "logits/rejected": -1.708595871925354, "logps/chosen": -178.91592407226562, "logps/rejected": -146.70932006835938, "loss": 4855.3984, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.019317051395773888, "rewards/margins": 0.014288688078522682, "rewards/rejected": 0.005028365179896355, "rewards/safe_rewards": 0.016483349725604057, "rewards/unsafe_rewards": 0.018739691004157066, "step": 680 }, { "epoch": 0.15, "learning_rate": 4.964373550690689e-07, "logits/chosen": -1.5141321420669556, "logits/rejected": -1.7128736972808838, "logps/chosen": -179.57386779785156, "logps/rejected": -144.0908966064453, "loss": 4657.5762, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.019877474755048752, "rewards/margins": 0.014471826143562794, "rewards/rejected": 0.005405646748840809, "rewards/safe_rewards": 0.021987130865454674, "rewards/unsafe_rewards": 0.023862585425376892, "step": 690 }, { "epoch": 0.15, "learning_rate": 4.961144792443493e-07, "logits/chosen": -1.5597556829452515, "logits/rejected": -1.7348425388336182, "logps/chosen": -177.72216796875, "logps/rejected": -143.12265014648438, "loss": 4872.6426, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.01977689564228058, "rewards/margins": 0.014712458476424217, "rewards/rejected": 0.005064436700195074, "rewards/safe_rewards": 0.021701151505112648, "rewards/unsafe_rewards": 0.020902257412672043, "step": 700 }, { "epoch": 0.15, "learning_rate": 4.957777145529013e-07, "logits/chosen": -1.5513734817504883, "logits/rejected": -1.681553840637207, "logps/chosen": -173.74038696289062, "logps/rejected": -149.1848907470703, "loss": 5115.968, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.020674163475632668, "rewards/margins": 0.010721264407038689, "rewards/rejected": 0.009952899999916553, "rewards/safe_rewards": 0.020846998319029808, "rewards/unsafe_rewards": 0.020744290202856064, "step": 710 }, { "epoch": 0.15, "learning_rate": 4.954270799992138e-07, "logits/chosen": -1.5741864442825317, "logits/rejected": -1.7159373760223389, "logps/chosen": -180.89617919921875, "logps/rejected": -153.37704467773438, "loss": 4756.9391, "rewards/accuracies": 0.625, "rewards/chosen": 0.018656719475984573, "rewards/margins": 0.012022222392261028, "rewards/rejected": 0.006634497083723545, "rewards/safe_rewards": 0.022019393742084503, "rewards/unsafe_rewards": 0.020668350160121918, "step": 720 }, { "epoch": 0.16, "learning_rate": 4.950625953704872e-07, "logits/chosen": -1.5495109558105469, "logits/rejected": -1.7143739461898804, "logps/chosen": -177.4563446044922, "logps/rejected": -143.67263793945312, "loss": 4430.4324, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.017520634457468987, "rewards/margins": 0.015744315460324287, "rewards/rejected": 0.0017763193463906646, "rewards/safe_rewards": 0.01495235227048397, "rewards/unsafe_rewards": 0.01728275790810585, "step": 730 }, { "epoch": 0.16, "learning_rate": 4.946842812355176e-07, "logits/chosen": -1.5130354166030884, "logits/rejected": -1.6808258295059204, "logps/chosen": -183.3751220703125, "logps/rejected": -154.5149383544922, "loss": 4772.2969, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.021085545420646667, "rewards/margins": 0.014290817081928253, "rewards/rejected": 0.006794729735702276, "rewards/safe_rewards": 0.017337966710329056, "rewards/unsafe_rewards": 0.026349077001214027, "step": 740 }, { "epoch": 0.16, "learning_rate": 4.942921589435345e-07, "logits/chosen": -1.5846855640411377, "logits/rejected": -1.7043269872665405, "logps/chosen": -182.97332763671875, "logps/rejected": -156.40847778320312, "loss": 4783.4145, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.022515857592225075, "rewards/margins": 0.011224842630326748, "rewards/rejected": 0.011291015893220901, "rewards/safe_rewards": 0.025791119784116745, "rewards/unsafe_rewards": 0.022067410871386528, "step": 750 }, { "epoch": 0.16, "learning_rate": 4.938862506229975e-07, "logits/chosen": -1.5550178289413452, "logits/rejected": -1.7574552297592163, "logps/chosen": -185.92514038085938, "logps/rejected": -144.5548553466797, "loss": 4596.8117, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.017119523137807846, "rewards/margins": 0.01760021224617958, "rewards/rejected": -0.00048068788601085544, "rewards/safe_rewards": 0.009329085238277912, "rewards/unsafe_rewards": 0.012996772304177284, "step": 760 }, { "epoch": 0.17, "learning_rate": 4.934665791803467e-07, "logits/chosen": -1.627903938293457, "logits/rejected": -1.7207536697387695, "logps/chosen": -174.9581298828125, "logps/rejected": -145.29373168945312, "loss": 5018.8313, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.016825996339321136, "rewards/margins": 0.016322974115610123, "rewards/rejected": 0.0005030218744650483, "rewards/safe_rewards": 0.014943329617381096, "rewards/unsafe_rewards": 0.01841549761593342, "step": 770 }, { "epoch": 0.17, "learning_rate": 4.9303316829871e-07, "logits/chosen": -1.5738252401351929, "logits/rejected": -1.6971334218978882, "logps/chosen": -174.09475708007812, "logps/rejected": -145.15338134765625, "loss": 5153.2844, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.019816061481833458, "rewards/margins": 0.016739143058657646, "rewards/rejected": 0.0030769193544983864, "rewards/safe_rewards": 0.021965594962239265, "rewards/unsafe_rewards": 0.022268492728471756, "step": 780 }, { "epoch": 0.17, "learning_rate": 4.925860424365672e-07, "logits/chosen": -1.505282998085022, "logits/rejected": -1.7062864303588867, "logps/chosen": -185.49606323242188, "logps/rejected": -148.89646911621094, "loss": 5025.6984, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.021019628271460533, "rewards/margins": 0.02147402986884117, "rewards/rejected": -0.0004543992690742016, "rewards/safe_rewards": 0.024632267653942108, "rewards/unsafe_rewards": 0.021677622571587563, "step": 790 }, { "epoch": 0.17, "learning_rate": 4.92125226826369e-07, "logits/chosen": -1.5427778959274292, "logits/rejected": -1.6362760066986084, "logps/chosen": -173.67210388183594, "logps/rejected": -154.96466064453125, "loss": 4954.359, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.01242048479616642, "rewards/margins": 0.008571824058890343, "rewards/rejected": 0.0038486614357680082, "rewards/safe_rewards": 0.013557620346546173, "rewards/unsafe_rewards": 0.013820466585457325, "step": 800 }, { "epoch": 0.17, "learning_rate": 4.916507474731139e-07, "logits/chosen": -1.5645537376403809, "logits/rejected": -1.6929397583007812, "logps/chosen": -177.75399780273438, "logps/rejected": -148.66651916503906, "loss": 4493.1648, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.012925478629767895, "rewards/margins": 0.012870279140770435, "rewards/rejected": 5.5200747738126665e-05, "rewards/safe_rewards": 0.013835974037647247, "rewards/unsafe_rewards": 0.016442596912384033, "step": 810 }, { "epoch": 0.18, "learning_rate": 4.911626311528797e-07, "logits/chosen": -1.5617138147354126, "logits/rejected": -1.725114107131958, "logps/chosen": -176.05545043945312, "logps/rejected": -147.82937622070312, "loss": 4702.2824, "rewards/accuracies": 0.6875, "rewards/chosen": 0.010513778775930405, "rewards/margins": 0.013840693049132824, "rewards/rejected": -0.003326915670186281, "rewards/safe_rewards": 0.015327843837440014, "rewards/unsafe_rewards": 0.0110200559720397, "step": 820 }, { "epoch": 0.18, "learning_rate": 4.906609054113132e-07, "logits/chosen": -1.575339674949646, "logits/rejected": -1.715907096862793, "logps/chosen": -178.47650146484375, "logps/rejected": -149.91897583007812, "loss": 4637.7875, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": 0.014935845509171486, "rewards/margins": 0.021757211536169052, "rewards/rejected": -0.006821366958320141, "rewards/safe_rewards": 0.01859288103878498, "rewards/unsafe_rewards": 0.015987958759069443, "step": 830 }, { "epoch": 0.18, "learning_rate": 4.901455985620758e-07, "logits/chosen": -1.5562220811843872, "logits/rejected": -1.6908047199249268, "logps/chosen": -170.28060913085938, "logps/rejected": -146.63534545898438, "loss": 5314.6797, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.008310581557452679, "rewards/margins": 0.012483244761824608, "rewards/rejected": -0.004172663204371929, "rewards/safe_rewards": 0.006303855683654547, "rewards/unsafe_rewards": 0.007961801253259182, "step": 840 }, { "epoch": 0.18, "learning_rate": 4.896167396852448e-07, "logits/chosen": -1.5469985008239746, "logits/rejected": -1.6732521057128906, "logps/chosen": -185.25784301757812, "logps/rejected": -156.919189453125, "loss": 4579.0203, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01226478535681963, "rewards/margins": 0.017367804422974586, "rewards/rejected": -0.005103019531816244, "rewards/safe_rewards": 0.014918260276317596, "rewards/unsafe_rewards": 0.01712292991578579, "step": 850 }, { "epoch": 0.19, "learning_rate": 4.890743586256732e-07, "logits/chosen": -1.5472127199172974, "logits/rejected": -1.7394263744354248, "logps/chosen": -186.6341094970703, "logps/rejected": -150.58648681640625, "loss": 4443.3844, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.010335646569728851, "rewards/margins": 0.017659544944763184, "rewards/rejected": -0.007323897443711758, "rewards/safe_rewards": 0.011138361878693104, "rewards/unsafe_rewards": 0.010932001285254955, "step": 860 }, { "epoch": 0.19, "learning_rate": 4.885184859913055e-07, "logits/chosen": -1.5702755451202393, "logits/rejected": -1.682360053062439, "logps/chosen": -178.3818359375, "logps/rejected": -148.6984100341797, "loss": 5049.8355, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.010441366583108902, "rewards/margins": 0.016061976552009583, "rewards/rejected": -0.0056206113658845425, "rewards/safe_rewards": 0.008375110104680061, "rewards/unsafe_rewards": 0.005646648816764355, "step": 870 }, { "epoch": 0.19, "learning_rate": 4.879491531514496e-07, "logits/chosen": -1.5613380670547485, "logits/rejected": -1.7039859294891357, "logps/chosen": -181.9600372314453, "logps/rejected": -151.79727172851562, "loss": 4732.7656, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.007948925718665123, "rewards/margins": 0.016761984676122665, "rewards/rejected": -0.008813057094812393, "rewards/safe_rewards": 0.005481810308992863, "rewards/unsafe_rewards": 0.006593714468181133, "step": 880 }, { "epoch": 0.19, "learning_rate": 4.873663922350073e-07, "logits/chosen": -1.4946448802947998, "logits/rejected": -1.6577112674713135, "logps/chosen": -186.999267578125, "logps/rejected": -151.83328247070312, "loss": 4417.8578, "rewards/accuracies": 0.59375, "rewards/chosen": 0.008463570848107338, "rewards/margins": 0.015700601041316986, "rewards/rejected": -0.0072370306588709354, "rewards/safe_rewards": 0.005575391463935375, "rewards/unsafe_rewards": 0.0033403397537767887, "step": 890 }, { "epoch": 0.19, "learning_rate": 4.867702361286611e-07, "logits/chosen": -1.5400947332382202, "logits/rejected": -1.694443941116333, "logps/chosen": -183.21932983398438, "logps/rejected": -153.85418701171875, "loss": 4936.0352, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0057777403853833675, "rewards/margins": 0.015826528891921043, "rewards/rejected": -0.010048788972198963, "rewards/safe_rewards": -0.0001579704403411597, "rewards/unsafe_rewards": 0.012007265351712704, "step": 900 }, { "epoch": 0.2, "learning_rate": 4.86160718475018e-07, "logits/chosen": -1.5794130563735962, "logits/rejected": -1.7272552251815796, "logps/chosen": -178.5189208984375, "logps/rejected": -148.9539337158203, "loss": 4596.1961, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.006363728549331427, "rewards/margins": 0.019010739400982857, "rewards/rejected": -0.012647010385990143, "rewards/safe_rewards": 0.008845261298120022, "rewards/unsafe_rewards": 0.0027075479738414288, "step": 910 }, { "epoch": 0.2, "learning_rate": 4.855378736707111e-07, "logits/chosen": -1.5744515657424927, "logits/rejected": -1.692866563796997, "logps/chosen": -179.04721069335938, "logps/rejected": -158.2822265625, "loss": 4980.1816, "rewards/accuracies": 0.606249988079071, "rewards/chosen": 0.0025621221866458654, "rewards/margins": 0.012630835175514221, "rewards/rejected": -0.010068711824715137, "rewards/safe_rewards": 0.004345524590462446, "rewards/unsafe_rewards": 0.00727293873205781, "step": 920 }, { "epoch": 0.2, "learning_rate": 4.849017368644587e-07, "logits/chosen": -1.579117774963379, "logits/rejected": -1.7048050165176392, "logps/chosen": -164.6121826171875, "logps/rejected": -146.69967651367188, "loss": 4624.9137, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0043413350358605385, "rewards/margins": 0.011375433765351772, "rewards/rejected": -0.01571676880121231, "rewards/safe_rewards": -0.008270684629678726, "rewards/unsafe_rewards": -0.005296608898788691, "step": 930 }, { "epoch": 0.2, "learning_rate": 4.842523439550805e-07, "logits/chosen": -1.5411508083343506, "logits/rejected": -1.674420714378357, "logps/chosen": -176.6567840576172, "logps/rejected": -150.55056762695312, "loss": 4988.2297, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0004101383383385837, "rewards/margins": 0.018538275733590126, "rewards/rejected": -0.01894841529428959, "rewards/safe_rewards": 0.00042616072460077703, "rewards/unsafe_rewards": 0.0020253141410648823, "step": 940 }, { "epoch": 0.2, "learning_rate": 4.835897315894717e-07, "logits/chosen": -1.527677297592163, "logits/rejected": -1.6546592712402344, "logps/chosen": -178.12094116210938, "logps/rejected": -151.09634399414062, "loss": 4667.7453, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.002617661142721772, "rewards/margins": 0.015693780034780502, "rewards/rejected": -0.01831144094467163, "rewards/safe_rewards": -0.005162347108125687, "rewards/unsafe_rewards": -0.003865959122776985, "step": 950 }, { "epoch": 0.21, "learning_rate": 4.829139371605355e-07, "logits/chosen": -1.5587852001190186, "logits/rejected": -1.6493736505508423, "logps/chosen": -180.42642211914062, "logps/rejected": -153.95884704589844, "loss": 4622.9918, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.005466217640787363, "rewards/margins": 0.017337221652269363, "rewards/rejected": -0.02280344069004059, "rewards/safe_rewards": -0.01242448017001152, "rewards/unsafe_rewards": -0.007321304641664028, "step": 960 }, { "epoch": 0.21, "learning_rate": 4.822249988050722e-07, "logits/chosen": -1.5443377494812012, "logits/rejected": -1.6500215530395508, "logps/chosen": -180.7191925048828, "logps/rejected": -155.9000701904297, "loss": 4440.5426, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.009531332179903984, "rewards/margins": 0.013552132062613964, "rewards/rejected": -0.023083463311195374, "rewards/safe_rewards": -0.012513317167758942, "rewards/unsafe_rewards": -0.013293862342834473, "step": 970 }, { "epoch": 0.21, "learning_rate": 4.815229554016274e-07, "logits/chosen": -1.5614014863967896, "logits/rejected": -1.6905111074447632, "logps/chosen": -182.5176544189453, "logps/rejected": -153.37161254882812, "loss": 4631.0988, "rewards/accuracies": 0.65625, "rewards/chosen": -0.006219635717570782, "rewards/margins": 0.01782199740409851, "rewards/rejected": -0.024041632190346718, "rewards/safe_rewards": -0.007816949859261513, "rewards/unsafe_rewards": -0.009216772392392159, "step": 980 }, { "epoch": 0.21, "learning_rate": 4.808078465682978e-07, "logits/chosen": -1.5036462545394897, "logits/rejected": -1.6800428628921509, "logps/chosen": -188.84451293945312, "logps/rejected": -149.65478515625, "loss": 4406.732, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.003791085910052061, "rewards/margins": 0.02408907748758793, "rewards/rejected": -0.027880162000656128, "rewards/safe_rewards": -0.006850613746792078, "rewards/unsafe_rewards": -0.007847568951547146, "step": 990 }, { "epoch": 0.22, "learning_rate": 4.800797126604958e-07, "logits/chosen": -1.5296428203582764, "logits/rejected": -1.744649887084961, "logps/chosen": -180.00326538085938, "logps/rejected": -148.6876220703125, "loss": 4764.3852, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.00831857044249773, "rewards/margins": 0.021949246525764465, "rewards/rejected": -0.03026781603693962, "rewards/safe_rewards": -0.008913389407098293, "rewards/unsafe_rewards": -0.004927180241793394, "step": 1000 }, { "epoch": 0.22, "eval_logits/chosen": -1.5780428647994995, "eval_logits/rejected": -1.72810959815979, "eval_logps/chosen": -180.80209350585938, "eval_logps/rejected": -151.17010498046875, "eval_loss": 4865.9208984375, "eval_rewards/accuracies": 0.6644482016563416, "eval_rewards/chosen": -0.009860348887741566, "eval_rewards/margins": 0.018373312428593636, "eval_rewards/rejected": -0.028233664110302925, "eval_rewards/safe_rewards": -0.009374169632792473, "eval_rewards/unsafe_rewards": -0.009836599230766296, "eval_runtime": 1088.8772, "eval_samples_per_second": 30.347, "eval_steps_per_second": 0.949, "step": 1000 }, { "epoch": 0.22, "learning_rate": 4.793385947686718e-07, "logits/chosen": -1.5510352849960327, "logits/rejected": -1.6964666843414307, "logps/chosen": -169.26101684570312, "logps/rejected": -143.75929260253906, "loss": 4900.8133, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.012449303641915321, "rewards/margins": 0.022763162851333618, "rewards/rejected": -0.03521246835589409, "rewards/safe_rewards": -0.013376560993492603, "rewards/unsafe_rewards": -0.01788097806274891, "step": 1010 }, { "epoch": 0.22, "learning_rate": 4.785845347159957e-07, "logits/chosen": -1.525052547454834, "logits/rejected": -1.6403815746307373, "logps/chosen": -176.87310791015625, "logps/rejected": -150.3484649658203, "loss": 4347.1016, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.013031141832470894, "rewards/margins": 0.019700592383742332, "rewards/rejected": -0.032731734216213226, "rewards/safe_rewards": -0.012211853638291359, "rewards/unsafe_rewards": -0.0182808730751276, "step": 1020 }, { "epoch": 0.22, "learning_rate": 4.778175750559959e-07, "logits/chosen": -1.5821510553359985, "logits/rejected": -1.6693662405014038, "logps/chosen": -177.48016357421875, "logps/rejected": -157.25814819335938, "loss": 4977.8695, "rewards/accuracies": 0.625, "rewards/chosen": -0.01617497019469738, "rewards/margins": 0.012112481519579887, "rewards/rejected": -0.028287451714277267, "rewards/safe_rewards": -0.018622705712914467, "rewards/unsafe_rewards": -0.011961039155721664, "step": 1030 }, { "epoch": 0.22, "learning_rate": 4.770377590701591e-07, "logits/chosen": -1.5436042547225952, "logits/rejected": -1.6797033548355103, "logps/chosen": -180.53854370117188, "logps/rejected": -154.8354949951172, "loss": 4585.5445, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.016166018322110176, "rewards/margins": 0.024707483127713203, "rewards/rejected": -0.04087350144982338, "rewards/safe_rewards": -0.011879777535796165, "rewards/unsafe_rewards": -0.013324739411473274, "step": 1040 }, { "epoch": 0.23, "learning_rate": 4.762451307654869e-07, "logits/chosen": -1.524965524673462, "logits/rejected": -1.6856762170791626, "logps/chosen": -185.17910766601562, "logps/rejected": -149.2259063720703, "loss": 5309.7898, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.0158572755753994, "rewards/margins": 0.02683008648455143, "rewards/rejected": -0.04268736019730568, "rewards/safe_rewards": -0.008610779419541359, "rewards/unsafe_rewards": -0.013669935055077076, "step": 1050 }, { "epoch": 0.23, "learning_rate": 4.754397348720128e-07, "logits/chosen": -1.4999595880508423, "logits/rejected": -1.6782264709472656, "logps/chosen": -190.922119140625, "logps/rejected": -158.21499633789062, "loss": 4652.4633, "rewards/accuracies": 0.71875, "rewards/chosen": -0.013646647334098816, "rewards/margins": 0.02510169707238674, "rewards/rejected": -0.03874834254384041, "rewards/safe_rewards": -0.01882224716246128, "rewards/unsafe_rewards": -0.010786881670355797, "step": 1060 }, { "epoch": 0.23, "learning_rate": 4.7462161684027784e-07, "logits/chosen": -1.5209945440292358, "logits/rejected": -1.6590080261230469, "logps/chosen": -191.70245361328125, "logps/rejected": -158.4144287109375, "loss": 4497.9355, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.009654097259044647, "rewards/margins": 0.020407911390066147, "rewards/rejected": -0.030062008649110794, "rewards/safe_rewards": -0.009215233847498894, "rewards/unsafe_rewards": -0.006583905313163996, "step": 1070 }, { "epoch": 0.23, "learning_rate": 4.737908228387656e-07, "logits/chosen": -1.5398730039596558, "logits/rejected": -1.7210054397583008, "logps/chosen": -174.3080596923828, "logps/rejected": -146.53610229492188, "loss": 4962.5883, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.023135431110858917, "rewards/margins": 0.02432725578546524, "rewards/rejected": -0.04746268317103386, "rewards/safe_rewards": -0.026932084932923317, "rewards/unsafe_rewards": -0.0225512757897377, "step": 1080 }, { "epoch": 0.23, "learning_rate": 4.7294739975129694e-07, "logits/chosen": -1.4983981847763062, "logits/rejected": -1.6769548654556274, "logps/chosen": -187.14663696289062, "logps/rejected": -152.4070587158203, "loss": 4656.8148, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.022311409935355186, "rewards/margins": 0.025456145405769348, "rewards/rejected": -0.047767557203769684, "rewards/safe_rewards": -0.020693659782409668, "rewards/unsafe_rewards": -0.021715760231018066, "step": 1090 }, { "epoch": 0.24, "learning_rate": 4.7209139517438425e-07, "logits/chosen": -1.505804419517517, "logits/rejected": -1.6296733617782593, "logps/chosen": -185.84036254882812, "logps/rejected": -162.27676391601562, "loss": 4702.3539, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.027340754866600037, "rewards/margins": 0.016213547438383102, "rewards/rejected": -0.04355430230498314, "rewards/safe_rewards": -0.02248484268784523, "rewards/unsafe_rewards": -0.024717839434742928, "step": 1100 }, { "epoch": 0.24, "learning_rate": 4.712228574145455e-07, "logits/chosen": -1.5047978162765503, "logits/rejected": -1.6634480953216553, "logps/chosen": -199.08847045898438, "logps/rejected": -161.100341796875, "loss": 4635.9949, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.022254398092627525, "rewards/margins": 0.026274647563695908, "rewards/rejected": -0.048529043793678284, "rewards/safe_rewards": -0.02378007397055626, "rewards/unsafe_rewards": -0.021826177835464478, "step": 1110 }, { "epoch": 0.24, "learning_rate": 4.7034183548557784e-07, "logits/chosen": -1.5533239841461182, "logits/rejected": -1.7166436910629272, "logps/chosen": -190.49130249023438, "logps/rejected": -153.79519653320312, "loss": 4790.7055, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.015208068303763866, "rewards/margins": 0.029561270028352737, "rewards/rejected": -0.04476933926343918, "rewards/safe_rewards": -0.017473068088293076, "rewards/unsafe_rewards": -0.027310604229569435, "step": 1120 }, { "epoch": 0.24, "learning_rate": 4.694483791057921e-07, "logits/chosen": -1.4894449710845947, "logits/rejected": -1.5994068384170532, "logps/chosen": -179.13082885742188, "logps/rejected": -154.52647399902344, "loss": 5130.4695, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.03587505593895912, "rewards/margins": 0.015189135447144508, "rewards/rejected": -0.05106419324874878, "rewards/safe_rewards": -0.03542325645685196, "rewards/unsafe_rewards": -0.04260321706533432, "step": 1130 }, { "epoch": 0.25, "learning_rate": 4.685425386952067e-07, "logits/chosen": -1.5252753496170044, "logits/rejected": -1.6408637762069702, "logps/chosen": -181.5166778564453, "logps/rejected": -152.76333618164062, "loss": 4573.6117, "rewards/accuracies": 0.625, "rewards/chosen": -0.030868560075759888, "rewards/margins": 0.017154265195131302, "rewards/rejected": -0.04802282527089119, "rewards/safe_rewards": -0.03166314959526062, "rewards/unsafe_rewards": -0.02820347249507904, "step": 1140 }, { "epoch": 0.25, "learning_rate": 4.6762436537270255e-07, "logits/chosen": -1.520483374595642, "logits/rejected": -1.6454414129257202, "logps/chosen": -187.6238250732422, "logps/rejected": -155.3218536376953, "loss": 4589.9258, "rewards/accuracies": 0.71875, "rewards/chosen": -0.025034060701727867, "rewards/margins": 0.02644175849854946, "rewards/rejected": -0.05147582292556763, "rewards/safe_rewards": -0.029457172378897667, "rewards/unsafe_rewards": -0.02147197350859642, "step": 1150 }, { "epoch": 0.25, "learning_rate": 4.66693910953138e-07, "logits/chosen": -1.5161839723587036, "logits/rejected": -1.6767972707748413, "logps/chosen": -185.7211456298828, "logps/rejected": -155.84197998046875, "loss": 4689.823, "rewards/accuracies": 0.6875, "rewards/chosen": -0.032916225492954254, "rewards/margins": 0.025957966223359108, "rewards/rejected": -0.05887419730424881, "rewards/safe_rewards": -0.0387568399310112, "rewards/unsafe_rewards": -0.03600591421127319, "step": 1160 }, { "epoch": 0.25, "learning_rate": 4.657512279444252e-07, "logits/chosen": -1.518432378768921, "logits/rejected": -1.6462745666503906, "logps/chosen": -181.6901397705078, "logps/rejected": -157.07095336914062, "loss": 4350.3531, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.03224266320466995, "rewards/margins": 0.017140626907348633, "rewards/rejected": -0.049383290112018585, "rewards/safe_rewards": -0.0425431951880455, "rewards/unsafe_rewards": -0.04406473785638809, "step": 1170 }, { "epoch": 0.25, "learning_rate": 4.6479636954456663e-07, "logits/chosen": -1.5313076972961426, "logits/rejected": -1.6694828271865845, "logps/chosen": -181.3092803955078, "logps/rejected": -152.27700805664062, "loss": 4611.6871, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.030405590310692787, "rewards/margins": 0.025212962180376053, "rewards/rejected": -0.05561854690313339, "rewards/safe_rewards": -0.03820235654711723, "rewards/unsafe_rewards": -0.03560890629887581, "step": 1180 }, { "epoch": 0.26, "learning_rate": 4.6382938963865305e-07, "logits/chosen": -1.4935702085494995, "logits/rejected": -1.6226661205291748, "logps/chosen": -185.72010803222656, "logps/rejected": -163.95388793945312, "loss": 4572.1449, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.03052748180925846, "rewards/margins": 0.026869025081396103, "rewards/rejected": -0.057396501302719116, "rewards/safe_rewards": -0.025897648185491562, "rewards/unsafe_rewards": -0.024454237893223763, "step": 1190 }, { "epoch": 0.26, "learning_rate": 4.628503427958227e-07, "logits/chosen": -1.5274089574813843, "logits/rejected": -1.6752698421478271, "logps/chosen": -183.65216064453125, "logps/rejected": -157.87374877929688, "loss": 4372.7023, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.029454613104462624, "rewards/margins": 0.023634344339370728, "rewards/rejected": -0.0530889555811882, "rewards/safe_rewards": -0.02984190359711647, "rewards/unsafe_rewards": -0.026918217539787292, "step": 1200 }, { "epoch": 0.26, "learning_rate": 4.6185928426618183e-07, "logits/chosen": -1.5541253089904785, "logits/rejected": -1.6651548147201538, "logps/chosen": -183.88851928710938, "logps/rejected": -158.6827392578125, "loss": 4606.8316, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.04269189015030861, "rewards/margins": 0.020082702860236168, "rewards/rejected": -0.06277459114789963, "rewards/safe_rewards": -0.040744148194789886, "rewards/unsafe_rewards": -0.05241793394088745, "step": 1210 }, { "epoch": 0.26, "learning_rate": 4.608562699776867e-07, "logits/chosen": -1.5234718322753906, "logits/rejected": -1.6588599681854248, "logps/chosen": -185.85240173339844, "logps/rejected": -156.0950164794922, "loss": 4399.4789, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04641484469175339, "rewards/margins": 0.025498170405626297, "rewards/rejected": -0.07191301882266998, "rewards/safe_rewards": -0.04427632316946983, "rewards/unsafe_rewards": -0.04633382707834244, "step": 1220 }, { "epoch": 0.26, "learning_rate": 4.598413565329875e-07, "logits/chosen": -1.570145606994629, "logits/rejected": -1.709363341331482, "logps/chosen": -188.9373321533203, "logps/rejected": -156.55392456054688, "loss": 4674.7906, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.045936621725559235, "rewards/margins": 0.024780260398983955, "rewards/rejected": -0.07071688026189804, "rewards/safe_rewards": -0.03641568124294281, "rewards/unsafe_rewards": -0.041964758187532425, "step": 1230 }, { "epoch": 0.27, "learning_rate": 4.588146012062342e-07, "logits/chosen": -1.5220743417739868, "logits/rejected": -1.710331916809082, "logps/chosen": -190.99609375, "logps/rejected": -156.50909423828125, "loss": 4775.6926, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.045392610132694244, "rewards/margins": 0.02367391251027584, "rewards/rejected": -0.06906651705503464, "rewards/safe_rewards": -0.04351535439491272, "rewards/unsafe_rewards": -0.05123267322778702, "step": 1240 }, { "epoch": 0.27, "learning_rate": 4.5777606193984406e-07, "logits/chosen": -1.5363917350769043, "logits/rejected": -1.6909393072128296, "logps/chosen": -179.78929138183594, "logps/rejected": -150.3070068359375, "loss": 4606.2188, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04535425826907158, "rewards/margins": 0.028326651081442833, "rewards/rejected": -0.07368091493844986, "rewards/safe_rewards": -0.04354425147175789, "rewards/unsafe_rewards": -0.0453382208943367, "step": 1250 }, { "epoch": 0.27, "learning_rate": 4.5672579734123256e-07, "logits/chosen": -1.544589877128601, "logits/rejected": -1.6744540929794312, "logps/chosen": -179.40481567382812, "logps/rejected": -153.87533569335938, "loss": 4584.5363, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0473867766559124, "rewards/margins": 0.027626436203718185, "rewards/rejected": -0.07501321285963058, "rewards/safe_rewards": -0.0577394962310791, "rewards/unsafe_rewards": -0.04357898607850075, "step": 1260 }, { "epoch": 0.27, "learning_rate": 4.5566386667950496e-07, "logits/chosen": -1.5377792119979858, "logits/rejected": -1.6929556131362915, "logps/chosen": -186.7716522216797, "logps/rejected": -159.73703002929688, "loss": 4398.9934, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.045682501047849655, "rewards/margins": 0.02606990933418274, "rewards/rejected": -0.07175241410732269, "rewards/safe_rewards": -0.04259079694747925, "rewards/unsafe_rewards": -0.043846528977155685, "step": 1270 }, { "epoch": 0.28, "learning_rate": 4.545903298821125e-07, "logits/chosen": -1.5290443897247314, "logits/rejected": -1.7350711822509766, "logps/chosen": -183.42015075683594, "logps/rejected": -147.2918243408203, "loss": 4457.0336, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04515897482633591, "rewards/margins": 0.02763000689446926, "rewards/rejected": -0.07278897613286972, "rewards/safe_rewards": -0.041426774114370346, "rewards/unsafe_rewards": -0.039675381034612656, "step": 1280 }, { "epoch": 0.28, "learning_rate": 4.5350524753147025e-07, "logits/chosen": -1.5030237436294556, "logits/rejected": -1.6855010986328125, "logps/chosen": -187.16676330566406, "logps/rejected": -153.16978454589844, "loss": 4671.4844, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.05055118724703789, "rewards/margins": 0.0321933813393116, "rewards/rejected": -0.08274456858634949, "rewards/safe_rewards": -0.05530065298080444, "rewards/unsafe_rewards": -0.05532488226890564, "step": 1290 }, { "epoch": 0.28, "learning_rate": 4.5240868086153795e-07, "logits/chosen": -1.4921553134918213, "logits/rejected": -1.6758975982666016, "logps/chosen": -183.57421875, "logps/rejected": -157.46127319335938, "loss": 4554.543, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.052961546927690506, "rewards/margins": 0.029907744377851486, "rewards/rejected": -0.0828692764043808, "rewards/safe_rewards": -0.04120653495192528, "rewards/unsafe_rewards": -0.05241749435663223, "step": 1300 }, { "epoch": 0.28, "learning_rate": 4.5130069175436494e-07, "logits/chosen": -1.5371856689453125, "logits/rejected": -1.6823320388793945, "logps/chosen": -181.1886749267578, "logps/rejected": -153.98573303222656, "loss": 4326.823, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.05967335030436516, "rewards/margins": 0.02600882016122341, "rewards/rejected": -0.08568216860294342, "rewards/safe_rewards": -0.05673162266612053, "rewards/unsafe_rewards": -0.06618614494800568, "step": 1310 }, { "epoch": 0.28, "learning_rate": 4.501813427365978e-07, "logits/chosen": -1.5077297687530518, "logits/rejected": -1.6856625080108643, "logps/chosen": -194.92250061035156, "logps/rejected": -164.2276611328125, "loss": 4552.0352, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.05745963007211685, "rewards/margins": 0.025643443688750267, "rewards/rejected": -0.08310307562351227, "rewards/safe_rewards": -0.047545842826366425, "rewards/unsafe_rewards": -0.06003541871905327, "step": 1320 }, { "epoch": 0.29, "learning_rate": 4.490506969759517e-07, "logits/chosen": -1.501695990562439, "logits/rejected": -1.6484177112579346, "logps/chosen": -191.25283813476562, "logps/rejected": -156.14901733398438, "loss": 4645.6582, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06024438887834549, "rewards/margins": 0.030391409993171692, "rewards/rejected": -0.09063579887151718, "rewards/safe_rewards": -0.03999023884534836, "rewards/unsafe_rewards": -0.059260934591293335, "step": 1330 }, { "epoch": 0.29, "learning_rate": 4.479088182776457e-07, "logits/chosen": -1.5254803895950317, "logits/rejected": -1.7227590084075928, "logps/chosen": -184.9578094482422, "logps/rejected": -150.9479522705078, "loss": 4464.598, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0670577734708786, "rewards/margins": 0.026129553094506264, "rewards/rejected": -0.09318731725215912, "rewards/safe_rewards": -0.06695044040679932, "rewards/unsafe_rewards": -0.07580827921628952, "step": 1340 }, { "epoch": 0.29, "learning_rate": 4.467557710808024e-07, "logits/chosen": -1.482630968093872, "logits/rejected": -1.6299711465835571, "logps/chosen": -198.63963317871094, "logps/rejected": -166.2589874267578, "loss": 4559.275, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.06899340450763702, "rewards/margins": 0.01892169378697872, "rewards/rejected": -0.08791510760784149, "rewards/safe_rewards": -0.05772694945335388, "rewards/unsafe_rewards": -0.07371903210878372, "step": 1350 }, { "epoch": 0.29, "learning_rate": 4.455916204548109e-07, "logits/chosen": -1.4690889120101929, "logits/rejected": -1.6367651224136353, "logps/chosen": -198.53579711914062, "logps/rejected": -165.66578674316406, "loss": 4687.2605, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.0653933435678482, "rewards/margins": 0.02982945181429386, "rewards/rejected": -0.09522279351949692, "rewards/safe_rewards": -0.06350763142108917, "rewards/unsafe_rewards": -0.05576132982969284, "step": 1360 }, { "epoch": 0.29, "learning_rate": 4.4441643209565536e-07, "logits/chosen": -1.5278400182724, "logits/rejected": -1.6557886600494385, "logps/chosen": -184.01242065429688, "logps/rejected": -158.33523559570312, "loss": 4743.0516, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.07305346429347992, "rewards/margins": 0.03201254457235336, "rewards/rejected": -0.10506601631641388, "rewards/safe_rewards": -0.07768633961677551, "rewards/unsafe_rewards": -0.06968124210834503, "step": 1370 }, { "epoch": 0.3, "learning_rate": 4.4323027232220716e-07, "logits/chosen": -1.5573253631591797, "logits/rejected": -1.7018846273422241, "logps/chosen": -184.21664428710938, "logps/rejected": -154.37911987304688, "loss": 4865.0086, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06416310369968414, "rewards/margins": 0.03410537913441658, "rewards/rejected": -0.09826847910881042, "rewards/safe_rewards": -0.06994439661502838, "rewards/unsafe_rewards": -0.07079917937517166, "step": 1380 }, { "epoch": 0.3, "learning_rate": 4.4203320807248245e-07, "logits/chosen": -1.4774253368377686, "logits/rejected": -1.6499392986297607, "logps/chosen": -196.40798950195312, "logps/rejected": -165.3723907470703, "loss": 5059.0922, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0761256217956543, "rewards/margins": 0.019815731793642044, "rewards/rejected": -0.09594134241342545, "rewards/safe_rewards": -0.0792849063873291, "rewards/unsafe_rewards": -0.08509591221809387, "step": 1390 }, { "epoch": 0.3, "learning_rate": 4.4082530689986496e-07, "logits/chosen": -1.5042283535003662, "logits/rejected": -1.6591205596923828, "logps/chosen": -179.6789093017578, "logps/rejected": -155.26712036132812, "loss": 4877.0633, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.07687252759933472, "rewards/margins": 0.02355758287012577, "rewards/rejected": -0.10043010860681534, "rewards/safe_rewards": -0.07183260470628738, "rewards/unsafe_rewards": -0.0753275528550148, "step": 1400 }, { "epoch": 0.3, "learning_rate": 4.396066369692934e-07, "logits/chosen": -1.538514494895935, "logits/rejected": -1.6654468774795532, "logps/chosen": -189.87562561035156, "logps/rejected": -158.88401794433594, "loss": 4389.8062, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.07384704053401947, "rewards/margins": 0.027946293354034424, "rewards/rejected": -0.1017933338880539, "rewards/safe_rewards": -0.08303812146186829, "rewards/unsafe_rewards": -0.08032767474651337, "step": 1410 }, { "epoch": 0.31, "learning_rate": 4.3837726705341493e-07, "logits/chosen": -1.517460584640503, "logits/rejected": -1.685102105140686, "logps/chosen": -190.13150024414062, "logps/rejected": -157.09078979492188, "loss": 4930.2273, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0846753641963005, "rewards/margins": 0.03842518478631973, "rewards/rejected": -0.12310054153203964, "rewards/safe_rewards": -0.08446918427944183, "rewards/unsafe_rewards": -0.08879521489143372, "step": 1420 }, { "epoch": 0.31, "learning_rate": 4.371372665287043e-07, "logits/chosen": -1.5377658605575562, "logits/rejected": -1.6732571125030518, "logps/chosen": -191.9783172607422, "logps/rejected": -160.6901397705078, "loss": 4816.6336, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08434223383665085, "rewards/margins": 0.03280803561210632, "rewards/rejected": -0.11715026199817657, "rewards/safe_rewards": -0.09200533479452133, "rewards/unsafe_rewards": -0.08296281844377518, "step": 1430 }, { "epoch": 0.31, "learning_rate": 4.3588670537154833e-07, "logits/chosen": -1.4955613613128662, "logits/rejected": -1.6576513051986694, "logps/chosen": -193.10189819335938, "logps/rejected": -163.51341247558594, "loss": 4855.6992, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0829058289527893, "rewards/margins": 0.027028951793909073, "rewards/rejected": -0.10993478447198868, "rewards/safe_rewards": -0.08236806094646454, "rewards/unsafe_rewards": -0.07732062041759491, "step": 1440 }, { "epoch": 0.31, "learning_rate": 4.346256541542974e-07, "logits/chosen": -1.5415652990341187, "logits/rejected": -1.6965878009796143, "logps/chosen": -190.3724365234375, "logps/rejected": -159.88287353515625, "loss": 4546.5988, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08694444596767426, "rewards/margins": 0.03067905642092228, "rewards/rejected": -0.11762350797653198, "rewards/safe_rewards": -0.08967327326536179, "rewards/unsafe_rewards": -0.07509349286556244, "step": 1450 }, { "epoch": 0.31, "learning_rate": 4.3335418404128257e-07, "logits/chosen": -1.5234099626541138, "logits/rejected": -1.7203166484832764, "logps/chosen": -185.0322723388672, "logps/rejected": -157.22264099121094, "loss": 4852.7863, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09016205370426178, "rewards/margins": 0.02999534085392952, "rewards/rejected": -0.1201573833823204, "rewards/safe_rewards": -0.09936000406742096, "rewards/unsafe_rewards": -0.09764011204242706, "step": 1460 }, { "epoch": 0.32, "learning_rate": 4.3207236678479983e-07, "logits/chosen": -1.5029499530792236, "logits/rejected": -1.6917537450790405, "logps/chosen": -195.02101135253906, "logps/rejected": -167.59982299804688, "loss": 4489.4875, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08811898529529572, "rewards/margins": 0.03717357665300369, "rewards/rejected": -0.12529256939888, "rewards/safe_rewards": -0.09929363429546356, "rewards/unsafe_rewards": -0.09525196254253387, "step": 1470 }, { "epoch": 0.32, "learning_rate": 4.3078027472106076e-07, "logits/chosen": -1.5581161975860596, "logits/rejected": -1.6952660083770752, "logps/chosen": -188.45236206054688, "logps/rejected": -159.8733367919922, "loss": 4552.6117, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08648441731929779, "rewards/margins": 0.03525042533874512, "rewards/rejected": -0.1217348575592041, "rewards/safe_rewards": -0.08927708119153976, "rewards/unsafe_rewards": -0.08929868042469025, "step": 1480 }, { "epoch": 0.32, "learning_rate": 4.2947798076611047e-07, "logits/chosen": -1.5243083238601685, "logits/rejected": -1.7080532312393188, "logps/chosen": -190.7737274169922, "logps/rejected": -156.8523712158203, "loss": 4718.8523, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.0936952605843544, "rewards/margins": 0.03586844354867935, "rewards/rejected": -0.12956370413303375, "rewards/safe_rewards": -0.0916556864976883, "rewards/unsafe_rewards": -0.09692257642745972, "step": 1490 }, { "epoch": 0.32, "learning_rate": 4.2816555841171273e-07, "logits/chosen": -1.528688669204712, "logits/rejected": -1.6605732440948486, "logps/chosen": -193.24349975585938, "logps/rejected": -162.99972534179688, "loss": 4814.1586, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10096482187509537, "rewards/margins": 0.03367628902196884, "rewards/rejected": -0.1346411257982254, "rewards/safe_rewards": -0.09196919202804565, "rewards/unsafe_rewards": -0.09098034352064133, "step": 1500 }, { "epoch": 0.32, "eval_logits/chosen": -1.5580991506576538, "eval_logits/rejected": -1.708475112915039, "eval_logps/chosen": -189.93003845214844, "eval_logps/rejected": -161.3236541748047, "eval_loss": 4783.4697265625, "eval_rewards/accuracies": 0.656582772731781, "eval_rewards/chosen": -0.10113979876041412, "eval_rewards/margins": 0.028629325330257416, "eval_rewards/rejected": -0.12976910173892975, "eval_rewards/safe_rewards": -0.1002892255783081, "eval_rewards/unsafe_rewards": -0.10089249163866043, "eval_runtime": 1107.0, "eval_samples_per_second": 29.85, "eval_steps_per_second": 0.933, "step": 1500 }, { "epoch": 0.32, "learning_rate": 4.268430817212029e-07, "logits/chosen": -1.537824273109436, "logits/rejected": -1.6589075326919556, "logps/chosen": -189.6312255859375, "logps/rejected": -163.50103759765625, "loss": 4408.7914, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09998024255037308, "rewards/margins": 0.02846408262848854, "rewards/rejected": -0.12844432890415192, "rewards/safe_rewards": -0.1154317706823349, "rewards/unsafe_rewards": -0.11080650985240936, "step": 1510 }, { "epoch": 0.33, "learning_rate": 4.255106253253078e-07, "logits/chosen": -1.5759140253067017, "logits/rejected": -1.666741132736206, "logps/chosen": -193.58978271484375, "logps/rejected": -168.98110961914062, "loss": 4762.4102, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.09497286379337311, "rewards/margins": 0.019975414499640465, "rewards/rejected": -0.11494828760623932, "rewards/safe_rewards": -0.10499858856201172, "rewards/unsafe_rewards": -0.10591202974319458, "step": 1520 }, { "epoch": 0.33, "learning_rate": 4.2416826441793465e-07, "logits/chosen": -1.5389692783355713, "logits/rejected": -1.6409199237823486, "logps/chosen": -191.65660095214844, "logps/rejected": -168.95225524902344, "loss": 4751.0453, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1127411276102066, "rewards/margins": 0.025328587740659714, "rewards/rejected": -0.13806971907615662, "rewards/safe_rewards": -0.12170658260583878, "rewards/unsafe_rewards": -0.11861193180084229, "step": 1530 }, { "epoch": 0.33, "learning_rate": 4.2281607475192737e-07, "logits/chosen": -1.5130434036254883, "logits/rejected": -1.67864990234375, "logps/chosen": -190.14700317382812, "logps/rejected": -162.35897827148438, "loss": 4975.4203, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.11146162450313568, "rewards/margins": 0.03165808320045471, "rewards/rejected": -0.1431197077035904, "rewards/safe_rewards": -0.1180088147521019, "rewards/unsafe_rewards": -0.10830843448638916, "step": 1540 }, { "epoch": 0.33, "learning_rate": 4.214541326347919e-07, "logits/chosen": -1.4666296243667603, "logits/rejected": -1.6825758218765259, "logps/chosen": -193.5967559814453, "logps/rejected": -161.01187133789062, "loss": 4966.0352, "rewards/accuracies": 0.65625, "rewards/chosen": -0.10996143519878387, "rewards/margins": 0.03255858272314072, "rewards/rejected": -0.142520010471344, "rewards/safe_rewards": -0.10145650058984756, "rewards/unsafe_rewards": -0.10628316551446915, "step": 1550 }, { "epoch": 0.34, "learning_rate": 4.200825149243896e-07, "logits/chosen": -1.4932947158813477, "logits/rejected": -1.6381498575210571, "logps/chosen": -199.1292266845703, "logps/rejected": -174.4131317138672, "loss": 4517.393, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.10837070643901825, "rewards/margins": 0.02493271790444851, "rewards/rejected": -0.1333034336566925, "rewards/safe_rewards": -0.09819348901510239, "rewards/unsafe_rewards": -0.1054019182920456, "step": 1560 }, { "epoch": 0.34, "learning_rate": 4.187012990246005e-07, "logits/chosen": -1.4683187007904053, "logits/rejected": -1.6500478982925415, "logps/chosen": -190.84954833984375, "logps/rejected": -160.42123413085938, "loss": 4681.8289, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.10673446953296661, "rewards/margins": 0.03077591583132744, "rewards/rejected": -0.13751038908958435, "rewards/safe_rewards": -0.10187576711177826, "rewards/unsafe_rewards": -0.11082375049591064, "step": 1570 }, { "epoch": 0.34, "learning_rate": 4.1731056288095455e-07, "logits/chosen": -1.5628395080566406, "logits/rejected": -1.6778180599212646, "logps/chosen": -183.80006408691406, "logps/rejected": -164.409423828125, "loss": 4883.7266, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11580002307891846, "rewards/margins": 0.01965830847620964, "rewards/rejected": -0.1354583203792572, "rewards/safe_rewards": -0.11279549449682236, "rewards/unsafe_rewards": -0.11630165576934814, "step": 1580 }, { "epoch": 0.34, "learning_rate": 4.159103849762337e-07, "logits/chosen": -1.499773383140564, "logits/rejected": -1.6635452508926392, "logps/chosen": -195.1200714111328, "logps/rejected": -171.11439514160156, "loss": 4673.232, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.10300751030445099, "rewards/margins": 0.03463372588157654, "rewards/rejected": -0.13764122128486633, "rewards/safe_rewards": -0.11106812953948975, "rewards/unsafe_rewards": -0.10607589781284332, "step": 1590 }, { "epoch": 0.34, "learning_rate": 4.1450084432604194e-07, "logits/chosen": -1.5234777927398682, "logits/rejected": -1.682011604309082, "logps/chosen": -189.66554260253906, "logps/rejected": -161.44068908691406, "loss": 4635.1934, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.10913839191198349, "rewards/margins": 0.04073639586567879, "rewards/rejected": -0.14987480640411377, "rewards/safe_rewards": -0.11047736555337906, "rewards/unsafe_rewards": -0.11316169798374176, "step": 1600 }, { "epoch": 0.35, "learning_rate": 4.130820204743474e-07, "logits/chosen": -1.5359930992126465, "logits/rejected": -1.7265965938568115, "logps/chosen": -191.86297607421875, "logps/rejected": -154.70101928710938, "loss": 4572.8258, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11838221549987793, "rewards/margins": 0.04143286123871803, "rewards/rejected": -0.15981508791446686, "rewards/safe_rewards": -0.12264015525579453, "rewards/unsafe_rewards": -0.11922023445367813, "step": 1610 }, { "epoch": 0.35, "learning_rate": 4.1165399348899274e-07, "logits/chosen": -1.4998595714569092, "logits/rejected": -1.6738866567611694, "logps/chosen": -192.2812042236328, "logps/rejected": -164.16021728515625, "loss": 4321.375, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.12320941686630249, "rewards/margins": 0.022266935557127, "rewards/rejected": -0.1454763561487198, "rewards/safe_rewards": -0.12465524673461914, "rewards/unsafe_rewards": -0.12423654645681381, "step": 1620 }, { "epoch": 0.35, "learning_rate": 4.1021684395717684e-07, "logits/chosen": -1.5246667861938477, "logits/rejected": -1.6726423501968384, "logps/chosen": -184.88677978515625, "logps/rejected": -162.3362579345703, "loss": 5018.5824, "rewards/accuracies": 0.625, "rewards/chosen": -0.11846532672643661, "rewards/margins": 0.028513198718428612, "rewards/rejected": -0.14697852730751038, "rewards/safe_rewards": -0.12438063323497772, "rewards/unsafe_rewards": -0.1294136494398117, "step": 1630 }, { "epoch": 0.35, "learning_rate": 4.0877065298090683e-07, "logits/chosen": -1.4848709106445312, "logits/rejected": -1.6439940929412842, "logps/chosen": -192.70907592773438, "logps/rejected": -164.2242889404297, "loss": 4591.6363, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11669857800006866, "rewards/margins": 0.028948958963155746, "rewards/rejected": -0.14564752578735352, "rewards/safe_rewards": -0.12119156122207642, "rewards/unsafe_rewards": -0.12438607215881348, "step": 1640 }, { "epoch": 0.36, "learning_rate": 4.0731550217242215e-07, "logits/chosen": -1.4997413158416748, "logits/rejected": -1.6710023880004883, "logps/chosen": -194.86123657226562, "logps/rejected": -164.29501342773438, "loss": 5333.068, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.11901208013296127, "rewards/margins": 0.03771593049168587, "rewards/rejected": -0.15672799944877625, "rewards/safe_rewards": -0.12273415178060532, "rewards/unsafe_rewards": -0.10976684093475342, "step": 1650 }, { "epoch": 0.36, "learning_rate": 4.058514736495879e-07, "logits/chosen": -1.563960313796997, "logits/rejected": -1.6570947170257568, "logps/chosen": -192.50552368164062, "logps/rejected": -162.29861450195312, "loss": 4640.0223, "rewards/accuracies": 0.59375, "rewards/chosen": -0.13349397480487823, "rewards/margins": 0.025972384959459305, "rewards/rejected": -0.15946635603904724, "rewards/safe_rewards": -0.11222796142101288, "rewards/unsafe_rewards": -0.11878158152103424, "step": 1660 }, { "epoch": 0.36, "learning_rate": 4.043786500312615e-07, "logits/chosen": -1.5279300212860107, "logits/rejected": -1.6687933206558228, "logps/chosen": -191.20932006835938, "logps/rejected": -163.1149444580078, "loss": 4378.8211, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.12452849000692368, "rewards/margins": 0.03482457250356674, "rewards/rejected": -0.1593530774116516, "rewards/safe_rewards": -0.1205214262008667, "rewards/unsafe_rewards": -0.11533038318157196, "step": 1670 }, { "epoch": 0.36, "learning_rate": 4.0289711443262976e-07, "logits/chosen": -1.5054595470428467, "logits/rejected": -1.633215308189392, "logps/chosen": -194.52597045898438, "logps/rejected": -171.93157958984375, "loss": 4583.4391, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12258823961019516, "rewards/margins": 0.030542481690645218, "rewards/rejected": -0.15313073992729187, "rewards/safe_rewards": -0.11997132003307343, "rewards/unsafe_rewards": -0.11239048093557358, "step": 1680 }, { "epoch": 0.36, "learning_rate": 4.01406950460519e-07, "logits/chosen": -1.5208232402801514, "logits/rejected": -1.6477069854736328, "logps/chosen": -187.86428833007812, "logps/rejected": -164.40432739257812, "loss": 4545.9742, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.12054673582315445, "rewards/margins": 0.035888440907001495, "rewards/rejected": -0.15643519163131714, "rewards/safe_rewards": -0.10346541553735733, "rewards/unsafe_rewards": -0.10718226432800293, "step": 1690 }, { "epoch": 0.37, "learning_rate": 3.9990824220867627e-07, "logits/chosen": -1.5114307403564453, "logits/rejected": -1.6504039764404297, "logps/chosen": -199.4632110595703, "logps/rejected": -165.00100708007812, "loss": 4740.1059, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11757256835699081, "rewards/margins": 0.03785143420100212, "rewards/rejected": -0.15542401373386383, "rewards/safe_rewards": -0.11829550564289093, "rewards/unsafe_rewards": -0.10830708593130112, "step": 1700 }, { "epoch": 0.37, "learning_rate": 3.9840107425302436e-07, "logits/chosen": -1.5149147510528564, "logits/rejected": -1.6344457864761353, "logps/chosen": -195.26638793945312, "logps/rejected": -173.31674194335938, "loss": 4818.1281, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12749747931957245, "rewards/margins": 0.03043445385992527, "rewards/rejected": -0.15793193876743317, "rewards/safe_rewards": -0.12672582268714905, "rewards/unsafe_rewards": -0.1338866651058197, "step": 1710 }, { "epoch": 0.37, "learning_rate": 3.9688553164688874e-07, "logits/chosen": -1.512686848640442, "logits/rejected": -1.5722835063934326, "logps/chosen": -191.93997192382812, "logps/rejected": -171.3597412109375, "loss": 4481.0012, "rewards/accuracies": 0.625, "rewards/chosen": -0.13480202853679657, "rewards/margins": 0.025449225679039955, "rewards/rejected": -0.16025125980377197, "rewards/safe_rewards": -0.13317185640335083, "rewards/unsafe_rewards": -0.13154098391532898, "step": 1720 }, { "epoch": 0.37, "learning_rate": 3.9536169991619746e-07, "logits/chosen": -1.5209314823150635, "logits/rejected": -1.6655257940292358, "logps/chosen": -196.43832397460938, "logps/rejected": -171.23483276367188, "loss": 5011.9023, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.131813645362854, "rewards/margins": 0.03495827689766884, "rewards/rejected": -0.16677191853523254, "rewards/safe_rewards": -0.12982752919197083, "rewards/unsafe_rewards": -0.14838407933712006, "step": 1730 }, { "epoch": 0.37, "learning_rate": 3.9382966505465515e-07, "logits/chosen": -1.5155845880508423, "logits/rejected": -1.6723324060440063, "logps/chosen": -194.37078857421875, "logps/rejected": -160.597412109375, "loss": 4450.7453, "rewards/accuracies": 0.6875, "rewards/chosen": -0.12863586843013763, "rewards/margins": 0.03935997188091278, "rewards/rejected": -0.16799584031105042, "rewards/safe_rewards": -0.13851897418498993, "rewards/unsafe_rewards": -0.14416223764419556, "step": 1740 }, { "epoch": 0.38, "learning_rate": 3.9228951351888996e-07, "logits/chosen": -1.5099736452102661, "logits/rejected": -1.6989933252334595, "logps/chosen": -207.7913818359375, "logps/rejected": -168.32443237304688, "loss": 4776.8762, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12218910455703735, "rewards/margins": 0.0447901152074337, "rewards/rejected": -0.16697920858860016, "rewards/safe_rewards": -0.1346699446439743, "rewards/unsafe_rewards": -0.12773606181144714, "step": 1750 }, { "epoch": 0.38, "learning_rate": 3.9074133222357464e-07, "logits/chosen": -1.5252752304077148, "logits/rejected": -1.6725393533706665, "logps/chosen": -191.07492065429688, "logps/rejected": -160.1776123046875, "loss": 4731.5121, "rewards/accuracies": 0.65625, "rewards/chosen": -0.12712064385414124, "rewards/margins": 0.036726612597703934, "rewards/rejected": -0.16384726762771606, "rewards/safe_rewards": -0.13120092451572418, "rewards/unsafe_rewards": -0.13385803997516632, "step": 1760 }, { "epoch": 0.38, "learning_rate": 3.891852085365217e-07, "logits/chosen": -1.5118917226791382, "logits/rejected": -1.7031481266021729, "logps/chosen": -196.19244384765625, "logps/rejected": -163.33419799804688, "loss": 4737.4328, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14053121209144592, "rewards/margins": 0.034223563969135284, "rewards/rejected": -0.1747547686100006, "rewards/safe_rewards": -0.12711220979690552, "rewards/unsafe_rewards": -0.12155942618846893, "step": 1770 }, { "epoch": 0.38, "learning_rate": 3.876212302737531e-07, "logits/chosen": -1.513858437538147, "logits/rejected": -1.6616404056549072, "logps/chosen": -191.3935546875, "logps/rejected": -166.4324493408203, "loss": 4720.8039, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12330826371908188, "rewards/margins": 0.043748270720243454, "rewards/rejected": -0.16705651581287384, "rewards/safe_rewards": -0.10936135053634644, "rewards/unsafe_rewards": -0.11604505777359009, "step": 1780 }, { "epoch": 0.39, "learning_rate": 3.8604948569454444e-07, "logits/chosen": -1.565945029258728, "logits/rejected": -1.6589988470077515, "logps/chosen": -187.71319580078125, "logps/rejected": -165.60409545898438, "loss": 4543.766, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.15425968170166016, "rewards/margins": 0.015469158999621868, "rewards/rejected": -0.1697288453578949, "rewards/safe_rewards": -0.16217832267284393, "rewards/unsafe_rewards": -0.15482516586780548, "step": 1790 }, { "epoch": 0.39, "learning_rate": 3.8447006349644433e-07, "logits/chosen": -1.5546067953109741, "logits/rejected": -1.6827272176742554, "logps/chosen": -192.89273071289062, "logps/rejected": -165.50807189941406, "loss": 4875.3945, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.13487322628498077, "rewards/margins": 0.044957052916288376, "rewards/rejected": -0.17983028292655945, "rewards/safe_rewards": -0.13276848196983337, "rewards/unsafe_rewards": -0.1317722052335739, "step": 1800 }, { "epoch": 0.39, "learning_rate": 3.82883052810269e-07, "logits/chosen": -1.5468754768371582, "logits/rejected": -1.737156629562378, "logps/chosen": -193.13771057128906, "logps/rejected": -161.40933227539062, "loss": 4535.4102, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1451563835144043, "rewards/margins": 0.032006919384002686, "rewards/rejected": -0.17716331779956818, "rewards/safe_rewards": -0.14445580542087555, "rewards/unsafe_rewards": -0.15693026781082153, "step": 1810 }, { "epoch": 0.39, "learning_rate": 3.8128854319507233e-07, "logits/chosen": -1.489431619644165, "logits/rejected": -1.6298091411590576, "logps/chosen": -199.9399871826172, "logps/rejected": -169.14071655273438, "loss": 4629.3816, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.1381499022245407, "rewards/margins": 0.026352811604738235, "rewards/rejected": -0.16450272500514984, "rewards/safe_rewards": -0.13834060728549957, "rewards/unsafe_rewards": -0.14610829949378967, "step": 1820 }, { "epoch": 0.39, "learning_rate": 3.79686624633092e-07, "logits/chosen": -1.5629639625549316, "logits/rejected": -1.673583984375, "logps/chosen": -188.42776489257812, "logps/rejected": -164.52830505371094, "loss": 4371.0785, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1417965292930603, "rewards/margins": 0.026078829541802406, "rewards/rejected": -0.16787537932395935, "rewards/safe_rewards": -0.13163700699806213, "rewards/unsafe_rewards": -0.1251065582036972, "step": 1830 }, { "epoch": 0.4, "learning_rate": 3.780773875246712e-07, "logits/chosen": -1.4871420860290527, "logits/rejected": -1.6644290685653687, "logps/chosen": -189.71253967285156, "logps/rejected": -164.8614959716797, "loss": 4514.3793, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1482163518667221, "rewards/margins": 0.034077826887369156, "rewards/rejected": -0.18229417502880096, "rewards/safe_rewards": -0.1476258486509323, "rewards/unsafe_rewards": -0.15007683634757996, "step": 1840 }, { "epoch": 0.4, "learning_rate": 3.7646092268315757e-07, "logits/chosen": -1.519734501838684, "logits/rejected": -1.6761239767074585, "logps/chosen": -196.5961151123047, "logps/rejected": -167.58834838867188, "loss": 4514.7875, "rewards/accuracies": 0.65625, "rewards/chosen": -0.13910305500030518, "rewards/margins": 0.038643963634967804, "rewards/rejected": -0.17774701118469238, "rewards/safe_rewards": -0.14205802977085114, "rewards/unsafe_rewards": -0.13988126814365387, "step": 1850 }, { "epoch": 0.4, "learning_rate": 3.748373213297779e-07, "logits/chosen": -1.484991192817688, "logits/rejected": -1.6660099029541016, "logps/chosen": -202.67568969726562, "logps/rejected": -167.13687133789062, "loss": 4564.5477, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13996842503547668, "rewards/margins": 0.049757130444049835, "rewards/rejected": -0.18972554802894592, "rewards/safe_rewards": -0.13505282998085022, "rewards/unsafe_rewards": -0.14128001034259796, "step": 1860 }, { "epoch": 0.4, "learning_rate": 3.7320667508849075e-07, "logits/chosen": -1.4829380512237549, "logits/rejected": -1.6173584461212158, "logps/chosen": -208.1465301513672, "logps/rejected": -177.58627319335938, "loss": 4408.2273, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.14004449546337128, "rewards/margins": 0.03407851979136467, "rewards/rejected": -0.17412300407886505, "rewards/safe_rewards": -0.15908434987068176, "rewards/unsafe_rewards": -0.13560417294502258, "step": 1870 }, { "epoch": 0.4, "learning_rate": 3.7156907598081536e-07, "logits/chosen": -1.5099645853042603, "logits/rejected": -1.7038726806640625, "logps/chosen": -185.73690795898438, "logps/rejected": -163.53038024902344, "loss": 4576.8039, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.14375685155391693, "rewards/margins": 0.03178495913743973, "rewards/rejected": -0.17554181814193726, "rewards/safe_rewards": -0.1412079632282257, "rewards/unsafe_rewards": -0.1479519158601761, "step": 1880 }, { "epoch": 0.41, "learning_rate": 3.6992461642063924e-07, "logits/chosen": -1.532083511352539, "logits/rejected": -1.653135061264038, "logps/chosen": -193.87269592285156, "logps/rejected": -170.20785522460938, "loss": 4644.7246, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15579074621200562, "rewards/margins": 0.0315186008810997, "rewards/rejected": -0.1873093545436859, "rewards/safe_rewards": -0.16624827682971954, "rewards/unsafe_rewards": -0.16547468304634094, "step": 1890 }, { "epoch": 0.41, "learning_rate": 3.6827338920900253e-07, "logits/chosen": -1.5281168222427368, "logits/rejected": -1.653794288635254, "logps/chosen": -193.35755920410156, "logps/rejected": -175.20355224609375, "loss": 4324.6402, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1601591855287552, "rewards/margins": 0.01697435975074768, "rewards/rejected": -0.17713356018066406, "rewards/safe_rewards": -0.16336555778980255, "rewards/unsafe_rewards": -0.16009892523288727, "step": 1900 }, { "epoch": 0.41, "learning_rate": 3.666154875288611e-07, "logits/chosen": -1.5120189189910889, "logits/rejected": -1.6923736333847046, "logps/chosen": -200.8025665283203, "logps/rejected": -168.51516723632812, "loss": 4529.6922, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.14737720787525177, "rewards/margins": 0.04636336863040924, "rewards/rejected": -0.193740576505661, "rewards/safe_rewards": -0.14131179451942444, "rewards/unsafe_rewards": -0.1441853940486908, "step": 1910 }, { "epoch": 0.41, "learning_rate": 3.6495100493982814e-07, "logits/chosen": -1.4914028644561768, "logits/rejected": -1.5870484113693237, "logps/chosen": -192.8271484375, "logps/rejected": -172.20733642578125, "loss": 4532.7613, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15324972569942474, "rewards/margins": 0.023919690400362015, "rewards/rejected": -0.17716941237449646, "rewards/safe_rewards": -0.14454595744609833, "rewards/unsafe_rewards": -0.14631310105323792, "step": 1920 }, { "epoch": 0.42, "learning_rate": 3.6328003537289453e-07, "logits/chosen": -1.5082820653915405, "logits/rejected": -1.6503303050994873, "logps/chosen": -199.08743286132812, "logps/rejected": -169.57693481445312, "loss": 4359.9492, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.15002259612083435, "rewards/margins": 0.034758009016513824, "rewards/rejected": -0.18478062748908997, "rewards/safe_rewards": -0.16475504636764526, "rewards/unsafe_rewards": -0.15786336362361908, "step": 1930 }, { "epoch": 0.42, "learning_rate": 3.616026731251273e-07, "logits/chosen": -1.528875470161438, "logits/rejected": -1.6688334941864014, "logps/chosen": -187.82205200195312, "logps/rejected": -167.4652099609375, "loss": 4359.3852, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.16956427693367004, "rewards/margins": 0.027880961075425148, "rewards/rejected": -0.19744522869586945, "rewards/safe_rewards": -0.16070137917995453, "rewards/unsafe_rewards": -0.15251697599887848, "step": 1940 }, { "epoch": 0.42, "learning_rate": 3.599190128543493e-07, "logits/chosen": -1.562877893447876, "logits/rejected": -1.7063143253326416, "logps/chosen": -187.72396850585938, "logps/rejected": -167.56739807128906, "loss": 4340.2844, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.1623212844133377, "rewards/margins": 0.022650117054581642, "rewards/rejected": -0.184971421957016, "rewards/safe_rewards": -0.15617766976356506, "rewards/unsafe_rewards": -0.1581607162952423, "step": 1950 }, { "epoch": 0.42, "learning_rate": 3.582291495737965e-07, "logits/chosen": -1.5327088832855225, "logits/rejected": -1.6926496028900146, "logps/chosen": -193.42005920410156, "logps/rejected": -165.14230346679688, "loss": 4540.7277, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.156415656208992, "rewards/margins": 0.050673335790634155, "rewards/rejected": -0.20708899199962616, "rewards/safe_rewards": -0.15579012036323547, "rewards/unsafe_rewards": -0.14954029023647308, "step": 1960 }, { "epoch": 0.42, "learning_rate": 3.5653317864675663e-07, "logits/chosen": -1.516349196434021, "logits/rejected": -1.6435381174087524, "logps/chosen": -197.13516235351562, "logps/rejected": -170.92176818847656, "loss": 4560.1402, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.1390058994293213, "rewards/margins": 0.04474402964115143, "rewards/rejected": -0.18374991416931152, "rewards/safe_rewards": -0.13598977029323578, "rewards/unsafe_rewards": -0.1273638755083084, "step": 1970 }, { "epoch": 0.43, "learning_rate": 3.548311957811874e-07, "logits/chosen": -1.5319312810897827, "logits/rejected": -1.6552826166152954, "logps/chosen": -191.04995727539062, "logps/rejected": -167.7104034423828, "loss": 4285.241, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.16155274212360382, "rewards/margins": 0.039853036403656006, "rewards/rejected": -0.20140579342842102, "rewards/safe_rewards": -0.15936799347400665, "rewards/unsafe_rewards": -0.15766359865665436, "step": 1980 }, { "epoch": 0.43, "learning_rate": 3.531232970243156e-07, "logits/chosen": -1.5349454879760742, "logits/rejected": -1.6777547597885132, "logps/chosen": -193.56491088867188, "logps/rejected": -170.81886291503906, "loss": 4164.8742, "rewards/accuracies": 0.625, "rewards/chosen": -0.15647685527801514, "rewards/margins": 0.030176788568496704, "rewards/rejected": -0.18665364384651184, "rewards/safe_rewards": -0.17747744917869568, "rewards/unsafe_rewards": -0.15970292687416077, "step": 1990 }, { "epoch": 0.43, "learning_rate": 3.5140957875721675e-07, "logits/chosen": -1.505481243133545, "logits/rejected": -1.6597639322280884, "logps/chosen": -202.08641052246094, "logps/rejected": -167.3864288330078, "loss": 4693.2395, "rewards/accuracies": 0.65625, "rewards/chosen": -0.15188416838645935, "rewards/margins": 0.03849487379193306, "rewards/rejected": -0.1903790384531021, "rewards/safe_rewards": -0.1525164544582367, "rewards/unsafe_rewards": -0.16176187992095947, "step": 2000 }, { "epoch": 0.43, "eval_logits/chosen": -1.5598112344741821, "eval_logits/rejected": -1.7079558372497559, "eval_logps/chosen": -195.7834930419922, "eval_logps/rejected": -167.6018524169922, "eval_loss": 4735.19775390625, "eval_rewards/accuracies": 0.6479912996292114, "eval_rewards/chosen": -0.1596742570400238, "eval_rewards/margins": 0.0328766331076622, "eval_rewards/rejected": -0.1925508826971054, "eval_rewards/safe_rewards": -0.1583378165960312, "eval_rewards/unsafe_rewards": -0.15876443684101105, "eval_runtime": 1092.5338, "eval_samples_per_second": 30.245, "eval_steps_per_second": 0.946, "step": 2000 }, { "epoch": 0.43, "learning_rate": 3.4969013768937626e-07, "logits/chosen": -1.4935743808746338, "logits/rejected": -1.6208422183990479, "logps/chosen": -199.55955505371094, "logps/rejected": -173.50155639648438, "loss": 4606.9531, "rewards/accuracies": 0.6875, "rewards/chosen": -0.14804984629154205, "rewards/margins": 0.04323448985815048, "rewards/rejected": -0.19128432869911194, "rewards/safe_rewards": -0.1498142033815384, "rewards/unsafe_rewards": -0.16407233476638794, "step": 2010 }, { "epoch": 0.43, "learning_rate": 3.479650708532316e-07, "logits/chosen": -1.4788436889648438, "logits/rejected": -1.6816246509552002, "logps/chosen": -206.125, "logps/rejected": -171.91233825683594, "loss": 4369.9297, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.15619800984859467, "rewards/margins": 0.03503455966711044, "rewards/rejected": -0.1912325918674469, "rewards/safe_rewards": -0.15829236805438995, "rewards/unsafe_rewards": -0.15976662933826447, "step": 2020 }, { "epoch": 0.44, "learning_rate": 3.4623447559869684e-07, "logits/chosen": -1.5245215892791748, "logits/rejected": -1.67314875125885, "logps/chosen": -192.7067108154297, "logps/rejected": -168.93069458007812, "loss": 4728.2547, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.15722766518592834, "rewards/margins": 0.049727145582437515, "rewards/rejected": -0.20695483684539795, "rewards/safe_rewards": -0.16923712193965912, "rewards/unsafe_rewards": -0.17044946551322937, "step": 2030 }, { "epoch": 0.44, "learning_rate": 3.444984495876686e-07, "logits/chosen": -1.5146068334579468, "logits/rejected": -1.6882543563842773, "logps/chosen": -194.78768920898438, "logps/rejected": -167.3456268310547, "loss": 4657.6684, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.16134384274482727, "rewards/margins": 0.0346418097615242, "rewards/rejected": -0.19598564505577087, "rewards/safe_rewards": -0.16767169535160065, "rewards/unsafe_rewards": -0.16538113355636597, "step": 2040 }, { "epoch": 0.44, "learning_rate": 3.4275709078851505e-07, "logits/chosen": -1.522111177444458, "logits/rejected": -1.6580556631088257, "logps/chosen": -195.66676330566406, "logps/rejected": -166.88668823242188, "loss": 4460.3754, "rewards/accuracies": 0.71875, "rewards/chosen": -0.17603282630443573, "rewards/margins": 0.04090113937854767, "rewards/rejected": -0.2169339954853058, "rewards/safe_rewards": -0.16600750386714935, "rewards/unsafe_rewards": -0.1819375604391098, "step": 2050 }, { "epoch": 0.44, "learning_rate": 3.4101049747054714e-07, "logits/chosen": -1.5495026111602783, "logits/rejected": -1.6891149282455444, "logps/chosen": -196.8260040283203, "logps/rejected": -168.7765350341797, "loss": 4473.8492, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17186620831489563, "rewards/margins": 0.03999987989664078, "rewards/rejected": -0.211866095662117, "rewards/safe_rewards": -0.16877253353595734, "rewards/unsafe_rewards": -0.17390649020671844, "step": 2060 }, { "epoch": 0.45, "learning_rate": 3.392587681984731e-07, "logits/chosen": -1.527313470840454, "logits/rejected": -1.699136734008789, "logps/chosen": -192.0068817138672, "logps/rejected": -164.4221649169922, "loss": 4344.6438, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.18242517113685608, "rewards/margins": 0.025168150663375854, "rewards/rejected": -0.20759332180023193, "rewards/safe_rewards": -0.17724883556365967, "rewards/unsafe_rewards": -0.18775682151317596, "step": 2070 }, { "epoch": 0.45, "learning_rate": 3.375020018268359e-07, "logits/chosen": -1.480007529258728, "logits/rejected": -1.6100314855575562, "logps/chosen": -198.6400146484375, "logps/rejected": -170.59938049316406, "loss": 4839.418, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.16315604746341705, "rewards/margins": 0.03862619027495384, "rewards/rejected": -0.2017822563648224, "rewards/safe_rewards": -0.15883924067020416, "rewards/unsafe_rewards": -0.1559022217988968, "step": 2080 }, { "epoch": 0.45, "learning_rate": 3.357402974944352e-07, "logits/chosen": -1.5571858882904053, "logits/rejected": -1.6805274486541748, "logps/chosen": -199.99563598632812, "logps/rejected": -169.01881408691406, "loss": 4624.8039, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.16341353952884674, "rewards/margins": 0.036734528839588165, "rewards/rejected": -0.2001480758190155, "rewards/safe_rewards": -0.16912630200386047, "rewards/unsafe_rewards": -0.1761053055524826, "step": 2090 }, { "epoch": 0.45, "learning_rate": 3.339737546187321e-07, "logits/chosen": -1.4840567111968994, "logits/rejected": -1.6616199016571045, "logps/chosen": -206.9954376220703, "logps/rejected": -170.8988800048828, "loss": 4374.6125, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16768932342529297, "rewards/margins": 0.04945260286331177, "rewards/rejected": -0.21714194118976593, "rewards/safe_rewards": -0.1729009449481964, "rewards/unsafe_rewards": -0.1853603720664978, "step": 2100 }, { "epoch": 0.45, "learning_rate": 3.3220247289023903e-07, "logits/chosen": -1.5535001754760742, "logits/rejected": -1.6747972965240479, "logps/chosen": -197.06094360351562, "logps/rejected": -166.65155029296875, "loss": 4667.6262, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.17255404591560364, "rewards/margins": 0.03279537707567215, "rewards/rejected": -0.20534944534301758, "rewards/safe_rewards": -0.16884677112102509, "rewards/unsafe_rewards": -0.17548829317092896, "step": 2110 }, { "epoch": 0.46, "learning_rate": 3.304265522668942e-07, "logits/chosen": -1.5219346284866333, "logits/rejected": -1.6688143014907837, "logps/chosen": -196.29928588867188, "logps/rejected": -168.91574096679688, "loss": 4285.018, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1843825876712799, "rewards/margins": 0.015963854268193245, "rewards/rejected": -0.2003464251756668, "rewards/safe_rewards": -0.19016426801681519, "rewards/unsafe_rewards": -0.2044658213853836, "step": 2120 }, { "epoch": 0.46, "learning_rate": 3.2864609296842016e-07, "logits/chosen": -1.5713613033294678, "logits/rejected": -1.694791555404663, "logps/chosen": -190.51687622070312, "logps/rejected": -169.47625732421875, "loss": 4580.3148, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.17123115062713623, "rewards/margins": 0.03402050584554672, "rewards/rejected": -0.20525164902210236, "rewards/safe_rewards": -0.17465582489967346, "rewards/unsafe_rewards": -0.18817821145057678, "step": 2130 }, { "epoch": 0.46, "learning_rate": 3.2686119547066873e-07, "logits/chosen": -1.5364387035369873, "logits/rejected": -1.6923913955688477, "logps/chosen": -198.44940185546875, "logps/rejected": -164.54745483398438, "loss": 4589.2234, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16616761684417725, "rewards/margins": 0.034110378473997116, "rewards/rejected": -0.20027799904346466, "rewards/safe_rewards": -0.16222570836544037, "rewards/unsafe_rewards": -0.17081135511398315, "step": 2140 }, { "epoch": 0.46, "learning_rate": 3.250719604999503e-07, "logits/chosen": -1.514459490776062, "logits/rejected": -1.6319299936294556, "logps/chosen": -194.6929931640625, "logps/rejected": -174.78775024414062, "loss": 4479.0449, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1816660463809967, "rewards/margins": 0.04073150455951691, "rewards/rejected": -0.22239753603935242, "rewards/safe_rewards": -0.16499900817871094, "rewards/unsafe_rewards": -0.16801717877388, "step": 2150 }, { "epoch": 0.46, "learning_rate": 3.232784890273501e-07, "logits/chosen": -1.4998911619186401, "logits/rejected": -1.6512269973754883, "logps/chosen": -205.2537384033203, "logps/rejected": -173.38986206054688, "loss": 4622.5875, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.18792471289634705, "rewards/margins": 0.027165651321411133, "rewards/rejected": -0.21509039402008057, "rewards/safe_rewards": -0.17774730920791626, "rewards/unsafe_rewards": -0.19432474672794342, "step": 2160 }, { "epoch": 0.47, "learning_rate": 3.2148088226303005e-07, "logits/chosen": -1.478515863418579, "logits/rejected": -1.600571632385254, "logps/chosen": -212.6832733154297, "logps/rejected": -179.21969604492188, "loss": 4216.7437, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.1678779125213623, "rewards/margins": 0.03318702429533005, "rewards/rejected": -0.20106497406959534, "rewards/safe_rewards": -0.152131587266922, "rewards/unsafe_rewards": -0.17872212827205658, "step": 2170 }, { "epoch": 0.47, "learning_rate": 3.19679241650517e-07, "logits/chosen": -1.5413628816604614, "logits/rejected": -1.6792118549346924, "logps/chosen": -197.38650512695312, "logps/rejected": -169.0177459716797, "loss": 4833.6258, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.17735476791858673, "rewards/margins": 0.03668416664004326, "rewards/rejected": -0.2140389382839203, "rewards/safe_rewards": -0.1584596484899521, "rewards/unsafe_rewards": -0.17684076726436615, "step": 2180 }, { "epoch": 0.47, "learning_rate": 3.178736688609779e-07, "logits/chosen": -1.5323039293289185, "logits/rejected": -1.6601394414901733, "logps/chosen": -192.86590576171875, "logps/rejected": -163.18594360351562, "loss": 5238.3188, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.18224409222602844, "rewards/margins": 0.03139295056462288, "rewards/rejected": -0.21363703906536102, "rewards/safe_rewards": -0.1784275770187378, "rewards/unsafe_rewards": -0.17896251380443573, "step": 2190 }, { "epoch": 0.47, "learning_rate": 3.160642657874828e-07, "logits/chosen": -1.5507875680923462, "logits/rejected": -1.6605165004730225, "logps/chosen": -197.36325073242188, "logps/rejected": -174.4957275390625, "loss": 4444.393, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17197445034980774, "rewards/margins": 0.031344201415777206, "rewards/rejected": -0.20331864058971405, "rewards/safe_rewards": -0.16311398148536682, "rewards/unsafe_rewards": -0.17147906124591827, "step": 2200 }, { "epoch": 0.48, "learning_rate": 3.1425113453925435e-07, "logits/chosen": -1.4743530750274658, "logits/rejected": -1.6496025323867798, "logps/chosen": -209.71200561523438, "logps/rejected": -175.37667846679688, "loss": 4500.3555, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.1756608784198761, "rewards/margins": 0.04788801446557045, "rewards/rejected": -0.22354888916015625, "rewards/safe_rewards": -0.1731472909450531, "rewards/unsafe_rewards": -0.15576961636543274, "step": 2210 }, { "epoch": 0.48, "learning_rate": 3.1243437743590544e-07, "logits/chosen": -1.521289587020874, "logits/rejected": -1.6816074848175049, "logps/chosen": -203.87704467773438, "logps/rejected": -174.94528198242188, "loss": 4621.9164, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.16913792490959167, "rewards/margins": 0.0414317324757576, "rewards/rejected": -0.21056966483592987, "rewards/safe_rewards": -0.15870608389377594, "rewards/unsafe_rewards": -0.15993310511112213, "step": 2220 }, { "epoch": 0.48, "learning_rate": 3.106140970016654e-07, "logits/chosen": -1.5114891529083252, "logits/rejected": -1.6278002262115479, "logps/chosen": -196.18832397460938, "logps/rejected": -174.38677978515625, "loss": 4379.1695, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.17566628754138947, "rewards/margins": 0.028811374679207802, "rewards/rejected": -0.20447763800621033, "rewards/safe_rewards": -0.18429887294769287, "rewards/unsafe_rewards": -0.16638454794883728, "step": 2230 }, { "epoch": 0.48, "learning_rate": 3.0879039595959394e-07, "logits/chosen": -1.5058948993682861, "logits/rejected": -1.6589816808700562, "logps/chosen": -206.59127807617188, "logps/rejected": -172.58274841308594, "loss": 4738.9871, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.1764165461063385, "rewards/margins": 0.040473513305187225, "rewards/rejected": -0.2168900966644287, "rewards/safe_rewards": -0.18373903632164001, "rewards/unsafe_rewards": -0.1971524953842163, "step": 2240 }, { "epoch": 0.48, "learning_rate": 3.069633772257844e-07, "logits/chosen": -1.4818823337554932, "logits/rejected": -1.682621717453003, "logps/chosen": -206.285888671875, "logps/rejected": -172.87208557128906, "loss": 4481.9488, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.17550985515117645, "rewards/margins": 0.04384458810091019, "rewards/rejected": -0.21935443580150604, "rewards/safe_rewards": -0.18313685059547424, "rewards/unsafe_rewards": -0.1938009262084961, "step": 2250 }, { "epoch": 0.49, "learning_rate": 3.05133143903556e-07, "logits/chosen": -1.53336501121521, "logits/rejected": -1.6590461730957031, "logps/chosen": -201.08425903320312, "logps/rejected": -170.27500915527344, "loss": 4381.3113, "rewards/accuracies": 0.65625, "rewards/chosen": -0.18615621328353882, "rewards/margins": 0.03344009071588516, "rewards/rejected": -0.21959631145000458, "rewards/safe_rewards": -0.20626184344291687, "rewards/unsafe_rewards": -0.20187325775623322, "step": 2260 }, { "epoch": 0.49, "learning_rate": 3.0329979927763525e-07, "logits/chosen": -1.5025571584701538, "logits/rejected": -1.6719276905059814, "logps/chosen": -196.11302185058594, "logps/rejected": -168.12208557128906, "loss": 4472.8641, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.18478217720985413, "rewards/margins": 0.04160640388727188, "rewards/rejected": -0.2263885736465454, "rewards/safe_rewards": -0.1952511966228485, "rewards/unsafe_rewards": -0.18517661094665527, "step": 2270 }, { "epoch": 0.49, "learning_rate": 3.0146344680832757e-07, "logits/chosen": -1.560009241104126, "logits/rejected": -1.7231817245483398, "logps/chosen": -204.5826416015625, "logps/rejected": -171.66639709472656, "loss": 4640.582, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.17331305146217346, "rewards/margins": 0.05153023079037666, "rewards/rejected": -0.22484329342842102, "rewards/safe_rewards": -0.16641171276569366, "rewards/unsafe_rewards": -0.16765719652175903, "step": 2280 }, { "epoch": 0.49, "learning_rate": 2.9962419012567866e-07, "logits/chosen": -1.5279995203018188, "logits/rejected": -1.6566078662872314, "logps/chosen": -196.39601135253906, "logps/rejected": -173.75881958007812, "loss": 4617.0711, "rewards/accuracies": 0.6875, "rewards/chosen": -0.17505872249603271, "rewards/margins": 0.04204284027218819, "rewards/rejected": -0.2171015441417694, "rewards/safe_rewards": -0.185357466340065, "rewards/unsafe_rewards": -0.15385648608207703, "step": 2290 }, { "epoch": 0.49, "learning_rate": 2.977821330236261e-07, "logits/chosen": -1.4917492866516113, "logits/rejected": -1.6159089803695679, "logps/chosen": -203.56375122070312, "logps/rejected": -174.23245239257812, "loss": 4627.9125, "rewards/accuracies": 0.625, "rewards/chosen": -0.18197062611579895, "rewards/margins": 0.02264375612139702, "rewards/rejected": -0.20461437106132507, "rewards/safe_rewards": -0.194724440574646, "rewards/unsafe_rewards": -0.19342705607414246, "step": 2300 }, { "epoch": 0.5, "learning_rate": 2.959373794541426e-07, "logits/chosen": -1.4854226112365723, "logits/rejected": -1.6195329427719116, "logps/chosen": -204.85824584960938, "logps/rejected": -174.81985473632812, "loss": 4765.0461, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.18630453944206238, "rewards/margins": 0.030784716829657555, "rewards/rejected": -0.21708926558494568, "rewards/safe_rewards": -0.17643964290618896, "rewards/unsafe_rewards": -0.1929279863834381, "step": 2310 }, { "epoch": 0.5, "learning_rate": 2.940900335213692e-07, "logits/chosen": -1.5003474950790405, "logits/rejected": -1.637312889099121, "logps/chosen": -198.9324951171875, "logps/rejected": -174.40963745117188, "loss": 4758.4102, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.18736639618873596, "rewards/margins": 0.03362072631716728, "rewards/rejected": -0.22098712623119354, "rewards/safe_rewards": -0.19408169388771057, "rewards/unsafe_rewards": -0.17988963425159454, "step": 2320 }, { "epoch": 0.5, "learning_rate": 2.922401994757407e-07, "logits/chosen": -1.5018656253814697, "logits/rejected": -1.653202772140503, "logps/chosen": -203.24166870117188, "logps/rejected": -172.0989227294922, "loss": 4281.3629, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1799718290567398, "rewards/margins": 0.03870037943124771, "rewards/rejected": -0.2186722308397293, "rewards/safe_rewards": -0.17770013213157654, "rewards/unsafe_rewards": -0.18572965264320374, "step": 2330 }, { "epoch": 0.5, "learning_rate": 2.903879817081025e-07, "logits/chosen": -1.5191972255706787, "logits/rejected": -1.679905891418457, "logps/chosen": -191.8000946044922, "logps/rejected": -165.34469604492188, "loss": 4935.0805, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.1955086588859558, "rewards/margins": 0.03485725447535515, "rewards/rejected": -0.23036591708660126, "rewards/safe_rewards": -0.18938510119915009, "rewards/unsafe_rewards": -0.18866722285747528, "step": 2340 }, { "epoch": 0.51, "learning_rate": 2.8853348474381917e-07, "logits/chosen": -1.5546767711639404, "logits/rejected": -1.6739442348480225, "logps/chosen": -205.6017303466797, "logps/rejected": -173.71954345703125, "loss": 4391.2195, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.1836847960948944, "rewards/margins": 0.04219796136021614, "rewards/rejected": -0.22588276863098145, "rewards/safe_rewards": -0.1798604130744934, "rewards/unsafe_rewards": -0.18258199095726013, "step": 2350 }, { "epoch": 0.51, "learning_rate": 2.866768132368765e-07, "logits/chosen": -1.5364986658096313, "logits/rejected": -1.6815483570098877, "logps/chosen": -205.49295043945312, "logps/rejected": -180.3798828125, "loss": 4825.6105, "rewards/accuracies": 0.65625, "rewards/chosen": -0.17091530561447144, "rewards/margins": 0.03666374087333679, "rewards/rejected": -0.20757906138896942, "rewards/safe_rewards": -0.17281818389892578, "rewards/unsafe_rewards": -0.16558371484279633, "step": 2360 }, { "epoch": 0.51, "learning_rate": 2.848180719639753e-07, "logits/chosen": -1.4780843257904053, "logits/rejected": -1.683833360671997, "logps/chosen": -206.0137481689453, "logps/rejected": -170.39950561523438, "loss": 4128.6773, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.18460284173488617, "rewards/margins": 0.0413229838013649, "rewards/rejected": -0.22592583298683167, "rewards/safe_rewards": -0.1897450089454651, "rewards/unsafe_rewards": -0.1770438253879547, "step": 2370 }, { "epoch": 0.51, "learning_rate": 2.829573658186182e-07, "logits/chosen": -1.5467725992202759, "logits/rejected": -1.674207091331482, "logps/chosen": -196.6776123046875, "logps/rejected": -167.75575256347656, "loss": 4300.9484, "rewards/accuracies": 0.6875, "rewards/chosen": -0.1986452043056488, "rewards/margins": 0.03822886198759079, "rewards/rejected": -0.2368740737438202, "rewards/safe_rewards": -0.19191065430641174, "rewards/unsafe_rewards": -0.2027779072523117, "step": 2380 }, { "epoch": 0.51, "learning_rate": 2.8109479980519066e-07, "logits/chosen": -1.512613296508789, "logits/rejected": -1.6308845281600952, "logps/chosen": -202.8953857421875, "logps/rejected": -176.2027587890625, "loss": 4324.0746, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18170194327831268, "rewards/margins": 0.03156155347824097, "rewards/rejected": -0.21326351165771484, "rewards/safe_rewards": -0.19627836346626282, "rewards/unsafe_rewards": -0.18031762540340424, "step": 2390 }, { "epoch": 0.52, "learning_rate": 2.792304790330357e-07, "logits/chosen": -1.469745397567749, "logits/rejected": -1.652886986732483, "logps/chosen": -209.7393035888672, "logps/rejected": -175.25770568847656, "loss": 4704.768, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19347238540649414, "rewards/margins": 0.03870789706707001, "rewards/rejected": -0.23218026757240295, "rewards/safe_rewards": -0.19870543479919434, "rewards/unsafe_rewards": -0.20104913413524628, "step": 2400 }, { "epoch": 0.52, "learning_rate": 2.7736450871052124e-07, "logits/chosen": -1.5466697216033936, "logits/rejected": -1.64468514919281, "logps/chosen": -193.91552734375, "logps/rejected": -176.87130737304688, "loss": 4928.6016, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18615570664405823, "rewards/margins": 0.040996454656124115, "rewards/rejected": -0.22715215384960175, "rewards/safe_rewards": -0.18248938024044037, "rewards/unsafe_rewards": -0.1845155954360962, "step": 2410 }, { "epoch": 0.52, "learning_rate": 2.7549699413910383e-07, "logits/chosen": -1.5246613025665283, "logits/rejected": -1.663921594619751, "logps/chosen": -199.07907104492188, "logps/rejected": -169.50804138183594, "loss": 4371.8254, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.19642892479896545, "rewards/margins": 0.037481434643268585, "rewards/rejected": -0.23391035199165344, "rewards/safe_rewards": -0.1954299509525299, "rewards/unsafe_rewards": -0.2012157440185547, "step": 2420 }, { "epoch": 0.52, "learning_rate": 2.736280407073859e-07, "logits/chosen": -1.538309097290039, "logits/rejected": -1.6732009649276733, "logps/chosen": -205.25515747070312, "logps/rejected": -173.90756225585938, "loss": 4653.9961, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.18661274015903473, "rewards/margins": 0.043180909007787704, "rewards/rejected": -0.22979363799095154, "rewards/safe_rewards": -0.17953699827194214, "rewards/unsafe_rewards": -0.18623623251914978, "step": 2430 }, { "epoch": 0.53, "learning_rate": 2.7175775388516827e-07, "logits/chosen": -1.5447183847427368, "logits/rejected": -1.696356177330017, "logps/chosen": -203.76593017578125, "logps/rejected": -174.75234985351562, "loss": 4343.1113, "rewards/accuracies": 0.59375, "rewards/chosen": -0.198257178068161, "rewards/margins": 0.03858915716409683, "rewards/rejected": -0.23684635758399963, "rewards/safe_rewards": -0.20236428081989288, "rewards/unsafe_rewards": -0.2083212435245514, "step": 2440 }, { "epoch": 0.53, "learning_rate": 2.6988623921749864e-07, "logits/chosen": -1.468209981918335, "logits/rejected": -1.676055669784546, "logps/chosen": -203.48355102539062, "logps/rejected": -169.39939880371094, "loss": 4296.5148, "rewards/accuracies": 0.71875, "rewards/chosen": -0.19744744896888733, "rewards/margins": 0.049594052135944366, "rewards/rejected": -0.2470414936542511, "rewards/safe_rewards": -0.18472909927368164, "rewards/unsafe_rewards": -0.1887267529964447, "step": 2450 }, { "epoch": 0.53, "learning_rate": 2.6801360231871496e-07, "logits/chosen": -1.5586713552474976, "logits/rejected": -1.64138662815094, "logps/chosen": -202.47537231445312, "logps/rejected": -177.58998107910156, "loss": 4422.4555, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.19838470220565796, "rewards/margins": 0.03206638619303703, "rewards/rejected": -0.23045110702514648, "rewards/safe_rewards": -0.1923210471868515, "rewards/unsafe_rewards": -0.19813212752342224, "step": 2460 }, { "epoch": 0.53, "learning_rate": 2.661399488664856e-07, "logits/chosen": -1.5509295463562012, "logits/rejected": -1.7298847436904907, "logps/chosen": -201.77125549316406, "logps/rejected": -168.5635223388672, "loss": 4160.593, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18675877153873444, "rewards/margins": 0.045043498277664185, "rewards/rejected": -0.23180226981639862, "rewards/safe_rewards": -0.18707288801670074, "rewards/unsafe_rewards": -0.18880777060985565, "step": 2470 }, { "epoch": 0.53, "learning_rate": 2.6426538459584565e-07, "logits/chosen": -1.5045541524887085, "logits/rejected": -1.7031694650650024, "logps/chosen": -207.01443481445312, "logps/rejected": -177.2470245361328, "loss": 4485.8734, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1926576793193817, "rewards/margins": 0.03662164881825447, "rewards/rejected": -0.22927935421466827, "rewards/safe_rewards": -0.18262812495231628, "rewards/unsafe_rewards": -0.1790202409029007, "step": 2480 }, { "epoch": 0.54, "learning_rate": 2.6239001529322995e-07, "logits/chosen": -1.5201712846755981, "logits/rejected": -1.6971031427383423, "logps/chosen": -197.39907836914062, "logps/rejected": -174.24420166015625, "loss": 4400.4852, "rewards/accuracies": 0.6875, "rewards/chosen": -0.1895659863948822, "rewards/margins": 0.039253491908311844, "rewards/rejected": -0.22881948947906494, "rewards/safe_rewards": -0.18520265817642212, "rewards/unsafe_rewards": -0.18350733816623688, "step": 2490 }, { "epoch": 0.54, "learning_rate": 2.6051394679050336e-07, "logits/chosen": -1.5451513528823853, "logits/rejected": -1.6884424686431885, "logps/chosen": -194.12803649902344, "logps/rejected": -168.74598693847656, "loss": 4747.273, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.19713914394378662, "rewards/margins": 0.03740449249744415, "rewards/rejected": -0.2345436066389084, "rewards/safe_rewards": -0.17802785336971283, "rewards/unsafe_rewards": -0.18939965963363647, "step": 2500 }, { "epoch": 0.54, "eval_logits/chosen": -1.5693013668060303, "eval_logits/rejected": -1.7166366577148438, "eval_logps/chosen": -199.5948028564453, "eval_logps/rejected": -171.5613555908203, "eval_loss": 4701.76513671875, "eval_rewards/accuracies": 0.6415778994560242, "eval_rewards/chosen": -0.19778746366500854, "eval_rewards/margins": 0.03435870260000229, "eval_rewards/rejected": -0.23214618861675262, "eval_rewards/safe_rewards": -0.19601015746593475, "eval_rewards/unsafe_rewards": -0.19620607793331146, "eval_runtime": 1039.3559, "eval_samples_per_second": 31.793, "eval_steps_per_second": 0.994, "step": 2500 }, { "epoch": 0.54, "learning_rate": 2.5863728495898845e-07, "logits/chosen": -1.5504822731018066, "logits/rejected": -1.707065224647522, "logps/chosen": -196.2899169921875, "logps/rejected": -170.51158142089844, "loss": 4211.4773, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.19635534286499023, "rewards/margins": 0.044305469840765, "rewards/rejected": -0.24066083133220673, "rewards/safe_rewards": -0.1875770092010498, "rewards/unsafe_rewards": -0.19095739722251892, "step": 2510 }, { "epoch": 0.54, "learning_rate": 2.567601357034908e-07, "logits/chosen": -1.5605950355529785, "logits/rejected": -1.7132766246795654, "logps/chosen": -191.8726348876953, "logps/rejected": -166.2849578857422, "loss": 4459.2312, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.1965082883834839, "rewards/margins": 0.041414372622966766, "rewards/rejected": -0.23792266845703125, "rewards/safe_rewards": -0.19717779755592346, "rewards/unsafe_rewards": -0.1969546526670456, "step": 2520 }, { "epoch": 0.54, "learning_rate": 2.5488260495632247e-07, "logits/chosen": -1.531693458557129, "logits/rejected": -1.685492753982544, "logps/chosen": -200.07876586914062, "logps/rejected": -169.68914794921875, "loss": 4156.95, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.18730352818965912, "rewards/margins": 0.04719041287899017, "rewards/rejected": -0.2344939410686493, "rewards/safe_rewards": -0.19574880599975586, "rewards/unsafe_rewards": -0.18330325186252594, "step": 2530 }, { "epoch": 0.55, "learning_rate": 2.5300479867132426e-07, "logits/chosen": -1.577541470527649, "logits/rejected": -1.6680841445922852, "logps/chosen": -201.19552612304688, "logps/rejected": -184.07559204101562, "loss": 4363.3023, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1980850249528885, "rewards/margins": 0.032196685671806335, "rewards/rejected": -0.23028171062469482, "rewards/safe_rewards": -0.2000546008348465, "rewards/unsafe_rewards": -0.18802449107170105, "step": 2540 }, { "epoch": 0.55, "learning_rate": 2.51126822817886e-07, "logits/chosen": -1.5106581449508667, "logits/rejected": -1.67984938621521, "logps/chosen": -192.710693359375, "logps/rejected": -166.26303100585938, "loss": 4530.7984, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.1906513273715973, "rewards/margins": 0.04666837677359581, "rewards/rejected": -0.2373197078704834, "rewards/safe_rewards": -0.19890260696411133, "rewards/unsafe_rewards": -0.18071317672729492, "step": 2550 }, { "epoch": 0.55, "learning_rate": 2.4924878337496705e-07, "logits/chosen": -1.568508505821228, "logits/rejected": -1.729461908340454, "logps/chosen": -199.64892578125, "logps/rejected": -169.7209930419922, "loss": 4493.65, "rewards/accuracies": 0.6875, "rewards/chosen": -0.19782809913158417, "rewards/margins": 0.04611339047551155, "rewards/rejected": -0.24394147098064423, "rewards/safe_rewards": -0.1780800074338913, "rewards/unsafe_rewards": -0.18747267127037048, "step": 2560 }, { "epoch": 0.55, "learning_rate": 2.4737078632511503e-07, "logits/chosen": -1.5839685201644897, "logits/rejected": -1.669395089149475, "logps/chosen": -192.79550170898438, "logps/rejected": -170.79786682128906, "loss": 4712.85, "rewards/accuracies": 0.6875, "rewards/chosen": -0.197386234998703, "rewards/margins": 0.05102890729904175, "rewards/rejected": -0.24841514229774475, "rewards/safe_rewards": -0.19670304656028748, "rewards/unsafe_rewards": -0.18058760464191437, "step": 2570 }, { "epoch": 0.56, "learning_rate": 2.454929376484852e-07, "logits/chosen": -1.5018171072006226, "logits/rejected": -1.6697053909301758, "logps/chosen": -202.87326049804688, "logps/rejected": -167.7611083984375, "loss": 4511.968, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17741093039512634, "rewards/margins": 0.049287956207990646, "rewards/rejected": -0.2266988754272461, "rewards/safe_rewards": -0.18110810220241547, "rewards/unsafe_rewards": -0.16810335218906403, "step": 2580 }, { "epoch": 0.56, "learning_rate": 2.4361534331686e-07, "logits/chosen": -1.500495433807373, "logits/rejected": -1.679686188697815, "logps/chosen": -205.57327270507812, "logps/rejected": -169.31495666503906, "loss": 4184.5203, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.2147361934185028, "rewards/margins": 0.03484708070755005, "rewards/rejected": -0.24958328902721405, "rewards/safe_rewards": -0.22017931938171387, "rewards/unsafe_rewards": -0.21876350045204163, "step": 2590 }, { "epoch": 0.56, "learning_rate": 2.4173810928766823e-07, "logits/chosen": -1.5288746356964111, "logits/rejected": -1.6827061176300049, "logps/chosen": -201.63180541992188, "logps/rejected": -170.29649353027344, "loss": 4650.9617, "rewards/accuracies": 0.59375, "rewards/chosen": -0.2082614004611969, "rewards/margins": 0.029747510328888893, "rewards/rejected": -0.23800890147686005, "rewards/safe_rewards": -0.20588140189647675, "rewards/unsafe_rewards": -0.20917364954948425, "step": 2600 }, { "epoch": 0.56, "learning_rate": 2.39861341498006e-07, "logits/chosen": -1.5730488300323486, "logits/rejected": -1.6724504232406616, "logps/chosen": -195.88851928710938, "logps/rejected": -173.4611053466797, "loss": 4299.2023, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20688121020793915, "rewards/margins": 0.02081337198615074, "rewards/rejected": -0.2276945859193802, "rewards/safe_rewards": -0.19698533415794373, "rewards/unsafe_rewards": -0.1980433166027069, "step": 2610 }, { "epoch": 0.56, "learning_rate": 2.3798514585865852e-07, "logits/chosen": -1.5170954465866089, "logits/rejected": -1.685490608215332, "logps/chosen": -206.40353393554688, "logps/rejected": -172.07530212402344, "loss": 4366.8742, "rewards/accuracies": 0.625, "rewards/chosen": -0.1971440613269806, "rewards/margins": 0.0329994335770607, "rewards/rejected": -0.2301435023546219, "rewards/safe_rewards": -0.18942363560199738, "rewards/unsafe_rewards": -0.18942859768867493, "step": 2620 }, { "epoch": 0.57, "learning_rate": 2.3610962824812275e-07, "logits/chosen": -1.5789998769760132, "logits/rejected": -1.712479591369629, "logps/chosen": -197.58877563476562, "logps/rejected": -171.1901092529297, "loss": 4551.8602, "rewards/accuracies": 0.65625, "rewards/chosen": -0.20181520283222198, "rewards/margins": 0.040505390614271164, "rewards/rejected": -0.24232058227062225, "rewards/safe_rewards": -0.21084916591644287, "rewards/unsafe_rewards": -0.1856330782175064, "step": 2630 }, { "epoch": 0.57, "learning_rate": 2.3423489450663306e-07, "logits/chosen": -1.569207787513733, "logits/rejected": -1.6886190176010132, "logps/chosen": -203.07049560546875, "logps/rejected": -174.48367309570312, "loss": 4167.5516, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.21627740561962128, "rewards/margins": 0.033327799290418625, "rewards/rejected": -0.2496052235364914, "rewards/safe_rewards": -0.23331061005592346, "rewards/unsafe_rewards": -0.22725176811218262, "step": 2640 }, { "epoch": 0.57, "learning_rate": 2.3236105043018812e-07, "logits/chosen": -1.5509974956512451, "logits/rejected": -1.6693681478500366, "logps/chosen": -191.91709899902344, "logps/rejected": -175.28521728515625, "loss": 4689.0336, "rewards/accuracies": 0.59375, "rewards/chosen": -0.20064015686511993, "rewards/margins": 0.03519872575998306, "rewards/rejected": -0.2358388602733612, "rewards/safe_rewards": -0.18812422454357147, "rewards/unsafe_rewards": -0.19629411399364471, "step": 2650 }, { "epoch": 0.57, "learning_rate": 2.3048820176458015e-07, "logits/chosen": -1.577636480331421, "logits/rejected": -1.6877830028533936, "logps/chosen": -200.65087890625, "logps/rejected": -171.69635009765625, "loss": 4291.4758, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.207768514752388, "rewards/margins": 0.025345012545585632, "rewards/rejected": -0.23311349749565125, "rewards/safe_rewards": -0.19822929799556732, "rewards/unsafe_rewards": -0.20675000548362732, "step": 2660 }, { "epoch": 0.57, "learning_rate": 2.2861645419942832e-07, "logits/chosen": -1.5650993585586548, "logits/rejected": -1.6813987493515015, "logps/chosen": -199.05734252929688, "logps/rejected": -175.79104614257812, "loss": 4668.7992, "rewards/accuracies": 0.625, "rewards/chosen": -0.18966099619865417, "rewards/margins": 0.042241401970386505, "rewards/rejected": -0.23190239071846008, "rewards/safe_rewards": -0.19673866033554077, "rewards/unsafe_rewards": -0.18796208500862122, "step": 2670 }, { "epoch": 0.58, "learning_rate": 2.2674591336221338e-07, "logits/chosen": -1.5107767581939697, "logits/rejected": -1.7127727270126343, "logps/chosen": -203.00538635253906, "logps/rejected": -171.3531036376953, "loss": 4500.359, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.18632449209690094, "rewards/margins": 0.05514819547533989, "rewards/rejected": -0.24147269129753113, "rewards/safe_rewards": -0.1903878152370453, "rewards/unsafe_rewards": -0.17838212847709656, "step": 2680 }, { "epoch": 0.58, "learning_rate": 2.2487668481231783e-07, "logits/chosen": -1.5248405933380127, "logits/rejected": -1.6705234050750732, "logps/chosen": -205.28720092773438, "logps/rejected": -170.92933654785156, "loss": 4489.0711, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.19671492278575897, "rewards/margins": 0.04327961429953575, "rewards/rejected": -0.23999452590942383, "rewards/safe_rewards": -0.20456400513648987, "rewards/unsafe_rewards": -0.19612132012844086, "step": 2690 }, { "epoch": 0.58, "learning_rate": 2.2300887403506804e-07, "logits/chosen": -1.517734169960022, "logits/rejected": -1.6909061670303345, "logps/chosen": -207.2577362060547, "logps/rejected": -173.4998016357422, "loss": 4760.8734, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.19193896651268005, "rewards/margins": 0.03682449460029602, "rewards/rejected": -0.22876346111297607, "rewards/safe_rewards": -0.18885107338428497, "rewards/unsafe_rewards": -0.19988921284675598, "step": 2700 }, { "epoch": 0.58, "learning_rate": 2.2114258643578216e-07, "logits/chosen": -1.510195255279541, "logits/rejected": -1.6849644184112549, "logps/chosen": -201.96487426757812, "logps/rejected": -172.35162353515625, "loss": 4265.4918, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.2018086463212967, "rewards/margins": 0.029315704479813576, "rewards/rejected": -0.23112432658672333, "rewards/safe_rewards": -0.20301690697669983, "rewards/unsafe_rewards": -0.19221507012844086, "step": 2710 }, { "epoch": 0.59, "learning_rate": 2.192779273338215e-07, "logits/chosen": -1.5415281057357788, "logits/rejected": -1.7104946374893188, "logps/chosen": -206.13900756835938, "logps/rejected": -178.76107788085938, "loss": 4446.4203, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.19603845477104187, "rewards/margins": 0.038541924208402634, "rewards/rejected": -0.234580397605896, "rewards/safe_rewards": -0.1911752074956894, "rewards/unsafe_rewards": -0.19572117924690247, "step": 2720 }, { "epoch": 0.59, "learning_rate": 2.1741500195664687e-07, "logits/chosen": -1.5440678596496582, "logits/rejected": -1.6898181438446045, "logps/chosen": -202.35635375976562, "logps/rejected": -173.29444885253906, "loss": 4597.3602, "rewards/accuracies": 0.625, "rewards/chosen": -0.19338266551494598, "rewards/margins": 0.03302343934774399, "rewards/rejected": -0.22640609741210938, "rewards/safe_rewards": -0.1989518702030182, "rewards/unsafe_rewards": -0.1950412541627884, "step": 2730 }, { "epoch": 0.59, "learning_rate": 2.155539154338809e-07, "logits/chosen": -1.5684623718261719, "logits/rejected": -1.7206283807754517, "logps/chosen": -185.26626586914062, "logps/rejected": -166.20046997070312, "loss": 4428.5258, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.21878042817115784, "rewards/margins": 0.01888570375740528, "rewards/rejected": -0.23766613006591797, "rewards/safe_rewards": -0.2025218904018402, "rewards/unsafe_rewards": -0.22718489170074463, "step": 2740 }, { "epoch": 0.59, "learning_rate": 2.1369477279137465e-07, "logits/chosen": -1.53014075756073, "logits/rejected": -1.656298041343689, "logps/chosen": -208.85342407226562, "logps/rejected": -178.5425567626953, "loss": 4536.2121, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.205061674118042, "rewards/margins": 0.028847072273492813, "rewards/rejected": -0.2339087426662445, "rewards/safe_rewards": -0.19740554690361023, "rewards/unsafe_rewards": -0.21920163929462433, "step": 2750 }, { "epoch": 0.59, "learning_rate": 2.1183767894528135e-07, "logits/chosen": -1.5086443424224854, "logits/rejected": -1.6301743984222412, "logps/chosen": -201.7202606201172, "logps/rejected": -179.6082000732422, "loss": 4300.3879, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.19766704738140106, "rewards/margins": 0.03496861457824707, "rewards/rejected": -0.23263569176197052, "rewards/safe_rewards": -0.2009933739900589, "rewards/unsafe_rewards": -0.17853900790214539, "step": 2760 }, { "epoch": 0.6, "learning_rate": 2.0998273869613544e-07, "logits/chosen": -1.4874091148376465, "logits/rejected": -1.6401450634002686, "logps/chosen": -197.0526123046875, "logps/rejected": -172.248046875, "loss": 4384.9141, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.20032823085784912, "rewards/margins": 0.02527112327516079, "rewards/rejected": -0.22559933364391327, "rewards/safe_rewards": -0.2060876339673996, "rewards/unsafe_rewards": -0.18664494156837463, "step": 2770 }, { "epoch": 0.6, "learning_rate": 2.0813005672293808e-07, "logits/chosen": -1.5299351215362549, "logits/rejected": -1.6839040517807007, "logps/chosen": -198.33592224121094, "logps/rejected": -169.84927368164062, "loss": 4998.1547, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.210770845413208, "rewards/margins": 0.03698652610182762, "rewards/rejected": -0.24775739014148712, "rewards/safe_rewards": -0.19847488403320312, "rewards/unsafe_rewards": -0.2026103436946869, "step": 2780 }, { "epoch": 0.6, "learning_rate": 2.0627973757725054e-07, "logits/chosen": -1.5782445669174194, "logits/rejected": -1.6961065530776978, "logps/chosen": -192.30172729492188, "logps/rejected": -172.23095703125, "loss": 4562.7812, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20980997383594513, "rewards/margins": 0.032659418880939484, "rewards/rejected": -0.2424694001674652, "rewards/safe_rewards": -0.2209273874759674, "rewards/unsafe_rewards": -0.20981907844543457, "step": 2790 }, { "epoch": 0.6, "learning_rate": 2.0443188567729347e-07, "logits/chosen": -1.551776647567749, "logits/rejected": -1.7003543376922607, "logps/chosen": -203.3460693359375, "logps/rejected": -174.6782684326172, "loss": 4919.6008, "rewards/accuracies": 0.6875, "rewards/chosen": -0.20233190059661865, "rewards/margins": 0.03578402101993561, "rewards/rejected": -0.23811593651771545, "rewards/safe_rewards": -0.19567596912384033, "rewards/unsafe_rewards": -0.21053609251976013, "step": 2800 }, { "epoch": 0.6, "learning_rate": 2.0258660530205463e-07, "logits/chosen": -1.503344178199768, "logits/rejected": -1.6933937072753906, "logps/chosen": -212.1704559326172, "logps/rejected": -174.46710205078125, "loss": 4296.932, "rewards/accuracies": 0.65625, "rewards/chosen": -0.19372889399528503, "rewards/margins": 0.04554586485028267, "rewards/rejected": -0.2392747700214386, "rewards/safe_rewards": -0.19337131083011627, "rewards/unsafe_rewards": -0.1928991675376892, "step": 2810 }, { "epoch": 0.61, "learning_rate": 2.0074400058540418e-07, "logits/chosen": -1.5557641983032227, "logits/rejected": -1.7213455438613892, "logps/chosen": -193.20849609375, "logps/rejected": -167.53622436523438, "loss": 4455.6094, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.20493879914283752, "rewards/margins": 0.03959424048662186, "rewards/rejected": -0.24453303217887878, "rewards/safe_rewards": -0.19709384441375732, "rewards/unsafe_rewards": -0.1989717185497284, "step": 2820 }, { "epoch": 0.61, "learning_rate": 1.9890417551021794e-07, "logits/chosen": -1.4782822132110596, "logits/rejected": -1.6415374279022217, "logps/chosen": -205.72366333007812, "logps/rejected": -175.6099395751953, "loss": 4565.0437, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.19023554027080536, "rewards/margins": 0.03917142003774643, "rewards/rejected": -0.22940698266029358, "rewards/safe_rewards": -0.17931707203388214, "rewards/unsafe_rewards": -0.2010422945022583, "step": 2830 }, { "epoch": 0.61, "learning_rate": 1.9706723390250955e-07, "logits/chosen": -1.5453864336013794, "logits/rejected": -1.721928596496582, "logps/chosen": -198.40701293945312, "logps/rejected": -168.09515380859375, "loss": 4364.6301, "rewards/accuracies": 0.65625, "rewards/chosen": -0.18271973729133606, "rewards/margins": 0.04703155532479286, "rewards/rejected": -0.22975127398967743, "rewards/safe_rewards": -0.1981278508901596, "rewards/unsafe_rewards": -0.194563627243042, "step": 2840 }, { "epoch": 0.61, "learning_rate": 1.9523327942557116e-07, "logits/chosen": -1.5288199186325073, "logits/rejected": -1.7086979150772095, "logps/chosen": -203.77769470214844, "logps/rejected": -174.5388641357422, "loss": 4391.4937, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.18563532829284668, "rewards/margins": 0.0690595954656601, "rewards/rejected": -0.2546949088573456, "rewards/safe_rewards": -0.19328105449676514, "rewards/unsafe_rewards": -0.19125904142856598, "step": 2850 }, { "epoch": 0.62, "learning_rate": 1.934024155741237e-07, "logits/chosen": -1.5432475805282593, "logits/rejected": -1.7038675546646118, "logps/chosen": -198.57333374023438, "logps/rejected": -171.006591796875, "loss": 4887.0398, "rewards/accuracies": 0.625, "rewards/chosen": -0.19471348822116852, "rewards/margins": 0.03395069018006325, "rewards/rejected": -0.22866418957710266, "rewards/safe_rewards": -0.20764963328838348, "rewards/unsafe_rewards": -0.19211608171463013, "step": 2860 }, { "epoch": 0.62, "learning_rate": 1.9157474566847593e-07, "logits/chosen": -1.5633355379104614, "logits/rejected": -1.689415693283081, "logps/chosen": -205.8857421875, "logps/rejected": -174.55288696289062, "loss": 4720.6742, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.19619080424308777, "rewards/margins": 0.02893402799963951, "rewards/rejected": -0.22512483596801758, "rewards/safe_rewards": -0.19640958309173584, "rewards/unsafe_rewards": -0.20611879229545593, "step": 2870 }, { "epoch": 0.62, "learning_rate": 1.8975037284869442e-07, "logits/chosen": -1.4951775074005127, "logits/rejected": -1.6283502578735352, "logps/chosen": -203.83485412597656, "logps/rejected": -174.9644012451172, "loss": 4463.8617, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.191941499710083, "rewards/margins": 0.03664858266711235, "rewards/rejected": -0.22859008610248566, "rewards/safe_rewards": -0.18621447682380676, "rewards/unsafe_rewards": -0.19047412276268005, "step": 2880 }, { "epoch": 0.62, "learning_rate": 1.879294000687827e-07, "logits/chosen": -1.513442873954773, "logits/rejected": -1.633811593055725, "logps/chosen": -202.70785522460938, "logps/rejected": -174.74984741210938, "loss": 4755.8008, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.20467662811279297, "rewards/margins": 0.029195329174399376, "rewards/rejected": -0.2338719666004181, "rewards/safe_rewards": -0.18860822916030884, "rewards/unsafe_rewards": -0.20826514065265656, "step": 2890 }, { "epoch": 0.62, "learning_rate": 1.8611193009087129e-07, "logits/chosen": -1.5140800476074219, "logits/rejected": -1.6556625366210938, "logps/chosen": -203.41671752929688, "logps/rejected": -176.00640869140625, "loss": 4468.8762, "rewards/accuracies": 0.6875, "rewards/chosen": -0.201602965593338, "rewards/margins": 0.04034573212265968, "rewards/rejected": -0.2419486939907074, "rewards/safe_rewards": -0.1966879963874817, "rewards/unsafe_rewards": -0.21145284175872803, "step": 2900 }, { "epoch": 0.63, "learning_rate": 1.842980654794188e-07, "logits/chosen": -1.5408556461334229, "logits/rejected": -1.6424930095672607, "logps/chosen": -198.3621826171875, "logps/rejected": -176.60447692871094, "loss": 4452.3328, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20448172092437744, "rewards/margins": 0.020542804151773453, "rewards/rejected": -0.2250245064496994, "rewards/safe_rewards": -0.19730757176876068, "rewards/unsafe_rewards": -0.19025036692619324, "step": 2910 }, { "epoch": 0.63, "learning_rate": 1.8248790859542366e-07, "logits/chosen": -1.5731232166290283, "logits/rejected": -1.7111440896987915, "logps/chosen": -190.44332885742188, "logps/rejected": -169.7258758544922, "loss": 4330.9441, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1995454579591751, "rewards/margins": 0.03663283586502075, "rewards/rejected": -0.23617830872535706, "rewards/safe_rewards": -0.2007269561290741, "rewards/unsafe_rewards": -0.18620434403419495, "step": 2920 }, { "epoch": 0.63, "learning_rate": 1.8068156159064798e-07, "logits/chosen": -1.5437380075454712, "logits/rejected": -1.6589109897613525, "logps/chosen": -202.52059936523438, "logps/rejected": -182.74819946289062, "loss": 4386.3656, "rewards/accuracies": 0.65625, "rewards/chosen": -0.19854086637496948, "rewards/margins": 0.0522574707865715, "rewards/rejected": -0.2507983446121216, "rewards/safe_rewards": -0.19896040856838226, "rewards/unsafe_rewards": -0.20799390971660614, "step": 2930 }, { "epoch": 0.63, "learning_rate": 1.7887912640185276e-07, "logits/chosen": -1.5307490825653076, "logits/rejected": -1.7054367065429688, "logps/chosen": -207.37185668945312, "logps/rejected": -173.26937866210938, "loss": 4460.2996, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.19062159955501556, "rewards/margins": 0.052648067474365234, "rewards/rejected": -0.243269681930542, "rewards/safe_rewards": -0.20524027943611145, "rewards/unsafe_rewards": -0.19675911962985992, "step": 2940 }, { "epoch": 0.63, "learning_rate": 1.770807047450449e-07, "logits/chosen": -1.5111279487609863, "logits/rejected": -1.627478837966919, "logps/chosen": -199.1936798095703, "logps/rejected": -175.44798278808594, "loss": 4237.0391, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20405860245227814, "rewards/margins": 0.03607747703790665, "rewards/rejected": -0.24013610184192657, "rewards/safe_rewards": -0.21334998309612274, "rewards/unsafe_rewards": -0.20128795504570007, "step": 2950 }, { "epoch": 0.64, "learning_rate": 1.752863981097379e-07, "logits/chosen": -1.52851402759552, "logits/rejected": -1.6503387689590454, "logps/chosen": -200.77450561523438, "logps/rejected": -171.7937774658203, "loss": 4430.6324, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.20042037963867188, "rewards/margins": 0.033999282866716385, "rewards/rejected": -0.23441962897777557, "rewards/safe_rewards": -0.19610650837421417, "rewards/unsafe_rewards": -0.2062242478132248, "step": 2960 }, { "epoch": 0.64, "learning_rate": 1.7349630775322366e-07, "logits/chosen": -1.5662755966186523, "logits/rejected": -1.7444403171539307, "logps/chosen": -197.9523162841797, "logps/rejected": -163.52651977539062, "loss": 4648.5223, "rewards/accuracies": 0.65625, "rewards/chosen": -0.20786547660827637, "rewards/margins": 0.05544137954711914, "rewards/rejected": -0.2633068561553955, "rewards/safe_rewards": -0.20542070269584656, "rewards/unsafe_rewards": -0.2149234265089035, "step": 2970 }, { "epoch": 0.64, "learning_rate": 1.717105346948592e-07, "logits/chosen": -1.607993483543396, "logits/rejected": -1.714992880821228, "logps/chosen": -191.37318420410156, "logps/rejected": -172.05540466308594, "loss": 4327.6477, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2075084149837494, "rewards/margins": 0.029152993112802505, "rewards/rejected": -0.23666143417358398, "rewards/safe_rewards": -0.2096833884716034, "rewards/unsafe_rewards": -0.23288047313690186, "step": 2980 }, { "epoch": 0.64, "learning_rate": 1.699291797103652e-07, "logits/chosen": -1.563627004623413, "logits/rejected": -1.6982638835906982, "logps/chosen": -197.26333618164062, "logps/rejected": -170.3524627685547, "loss": 4751.0848, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.19608674943447113, "rewards/margins": 0.04259530454874039, "rewards/rejected": -0.2386820763349533, "rewards/safe_rewards": -0.2007298469543457, "rewards/unsafe_rewards": -0.20902566611766815, "step": 2990 }, { "epoch": 0.65, "learning_rate": 1.6815234332613898e-07, "logits/chosen": -1.4947175979614258, "logits/rejected": -1.667734146118164, "logps/chosen": -198.56466674804688, "logps/rejected": -170.52401733398438, "loss": 4464.0027, "rewards/accuracies": 0.65625, "rewards/chosen": -0.19445013999938965, "rewards/margins": 0.04136034846305847, "rewards/rejected": -0.23581048846244812, "rewards/safe_rewards": -0.19851234555244446, "rewards/unsafe_rewards": -0.18935590982437134, "step": 3000 }, { "epoch": 0.65, "eval_logits/chosen": -1.5767643451690674, "eval_logits/rejected": -1.7240060567855835, "eval_logps/chosen": -200.4293975830078, "eval_logps/rejected": -172.457763671875, "eval_loss": 4681.61669921875, "eval_rewards/accuracies": 0.6356486082077026, "eval_rewards/chosen": -0.20613320171833038, "eval_rewards/margins": 0.03497704491019249, "eval_rewards/rejected": -0.24111022055149078, "eval_rewards/safe_rewards": -0.20410189032554626, "eval_rewards/unsafe_rewards": -0.20427952706813812, "eval_runtime": 1092.3448, "eval_samples_per_second": 30.251, "eval_steps_per_second": 0.946, "step": 3000 }, { "epoch": 0.65, "learning_rate": 1.6638012581358218e-07, "logits/chosen": -1.5389224290847778, "logits/rejected": -1.6999642848968506, "logps/chosen": -204.77188110351562, "logps/rejected": -171.12969970703125, "loss": 4287.3328, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.1947934776544571, "rewards/margins": 0.04912746697664261, "rewards/rejected": -0.2439209520816803, "rewards/safe_rewards": -0.1799081414937973, "rewards/unsafe_rewards": -0.2076074630022049, "step": 3010 }, { "epoch": 0.65, "learning_rate": 1.6461262718344133e-07, "logits/chosen": -1.5812143087387085, "logits/rejected": -1.7010109424591064, "logps/chosen": -195.73025512695312, "logps/rejected": -172.62442016601562, "loss": 4704.8164, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.19697165489196777, "rewards/margins": 0.04454663023352623, "rewards/rejected": -0.2415182888507843, "rewards/safe_rewards": -0.19633518159389496, "rewards/unsafe_rewards": -0.20700526237487793, "step": 3020 }, { "epoch": 0.65, "learning_rate": 1.6284994718016465e-07, "logits/chosen": -1.5657060146331787, "logits/rejected": -1.7048152685165405, "logps/chosen": -195.546630859375, "logps/rejected": -169.56910705566406, "loss": 4379.8633, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.19721569120883942, "rewards/margins": 0.04133676737546921, "rewards/rejected": -0.23855248093605042, "rewards/safe_rewards": -0.19170241057872772, "rewards/unsafe_rewards": -0.19804659485816956, "step": 3030 }, { "epoch": 0.65, "learning_rate": 1.6109218527627306e-07, "logits/chosen": -1.5368934869766235, "logits/rejected": -1.6984144449234009, "logps/chosen": -208.07418823242188, "logps/rejected": -176.7255859375, "loss": 4659.4094, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2019340991973877, "rewards/margins": 0.03176935762166977, "rewards/rejected": -0.23370344936847687, "rewards/safe_rewards": -0.20242567360401154, "rewards/unsafe_rewards": -0.1958557665348053, "step": 3040 }, { "epoch": 0.66, "learning_rate": 1.5933944066674622e-07, "logits/chosen": -1.5390002727508545, "logits/rejected": -1.6704362630844116, "logps/chosen": -206.99453735351562, "logps/rejected": -172.66021728515625, "loss": 4278.8406, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2147243469953537, "rewards/margins": 0.036833278834819794, "rewards/rejected": -0.2515576183795929, "rewards/safe_rewards": -0.20041997730731964, "rewards/unsafe_rewards": -0.18982048332691193, "step": 3050 }, { "epoch": 0.66, "learning_rate": 1.5759181226342553e-07, "logits/chosen": -1.5766212940216064, "logits/rejected": -1.697432279586792, "logps/chosen": -199.2848663330078, "logps/rejected": -177.80130004882812, "loss": 4499.6531, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.1989654302597046, "rewards/margins": 0.04014367610216141, "rewards/rejected": -0.2391091138124466, "rewards/safe_rewards": -0.21582308411598206, "rewards/unsafe_rewards": -0.20545117557048798, "step": 3060 }, { "epoch": 0.66, "learning_rate": 1.5584939868943158e-07, "logits/chosen": -1.550281047821045, "logits/rejected": -1.6750667095184326, "logps/chosen": -201.0919647216797, "logps/rejected": -171.35665893554688, "loss": 4558.2668, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.19992206990718842, "rewards/margins": 0.04724174737930298, "rewards/rejected": -0.2471638023853302, "rewards/safe_rewards": -0.19373928010463715, "rewards/unsafe_rewards": -0.19965949654579163, "step": 3070 }, { "epoch": 0.66, "learning_rate": 1.5411229827359894e-07, "logits/chosen": -1.557908296585083, "logits/rejected": -1.6652119159698486, "logps/chosen": -194.4299774169922, "logps/rejected": -175.22972106933594, "loss": 4579.3074, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2175595760345459, "rewards/margins": 0.019637059420347214, "rewards/rejected": -0.2371966391801834, "rewards/safe_rewards": -0.22172220051288605, "rewards/unsafe_rewards": -0.20907466113567352, "step": 3080 }, { "epoch": 0.66, "learning_rate": 1.5238060904492716e-07, "logits/chosen": -1.5032063722610474, "logits/rejected": -1.6561492681503296, "logps/chosen": -211.14730834960938, "logps/rejected": -177.60861206054688, "loss": 4675.5777, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.19702103734016418, "rewards/margins": 0.030159270390868187, "rewards/rejected": -0.2271803319454193, "rewards/safe_rewards": -0.20415782928466797, "rewards/unsafe_rewards": -0.19745634496212006, "step": 3090 }, { "epoch": 0.67, "learning_rate": 1.506544287270487e-07, "logits/chosen": -1.52834951877594, "logits/rejected": -1.7070010900497437, "logps/chosen": -200.551513671875, "logps/rejected": -171.8428497314453, "loss": 4224.1754, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.19622871279716492, "rewards/margins": 0.04440726712346077, "rewards/rejected": -0.240635946393013, "rewards/safe_rewards": -0.19283874332904816, "rewards/unsafe_rewards": -0.20227304100990295, "step": 3100 }, { "epoch": 0.67, "learning_rate": 1.4893385473271413e-07, "logits/chosen": -1.5810573101043701, "logits/rejected": -1.7245237827301025, "logps/chosen": -188.88137817382812, "logps/rejected": -170.65615844726562, "loss": 4566.0258, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.21648144721984863, "rewards/margins": 0.03265110403299332, "rewards/rejected": -0.24913254380226135, "rewards/safe_rewards": -0.20510610938072205, "rewards/unsafe_rewards": -0.20620611310005188, "step": 3110 }, { "epoch": 0.67, "learning_rate": 1.4721898415829493e-07, "logits/chosen": -1.5554697513580322, "logits/rejected": -1.7137749195098877, "logps/chosen": -198.98330688476562, "logps/rejected": -169.91387939453125, "loss": 4549.0047, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.21290090680122375, "rewards/margins": 0.030341366305947304, "rewards/rejected": -0.2432422935962677, "rewards/safe_rewards": -0.21039363741874695, "rewards/unsafe_rewards": -0.2101362645626068, "step": 3120 }, { "epoch": 0.67, "learning_rate": 1.4550991377830423e-07, "logits/chosen": -1.5757358074188232, "logits/rejected": -1.730033278465271, "logps/chosen": -206.6368408203125, "logps/rejected": -180.8277587890625, "loss": 4321.5664, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2012835294008255, "rewards/margins": 0.04758080840110779, "rewards/rejected": -0.2488643229007721, "rewards/safe_rewards": -0.19152560830116272, "rewards/unsafe_rewards": -0.20465119183063507, "step": 3130 }, { "epoch": 0.68, "learning_rate": 1.4380674003993498e-07, "logits/chosen": -1.4896045923233032, "logits/rejected": -1.6357723474502563, "logps/chosen": -209.26602172851562, "logps/rejected": -173.13046264648438, "loss": 4907.3379, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.20854011178016663, "rewards/margins": 0.04477395862340927, "rewards/rejected": -0.2533140778541565, "rewards/safe_rewards": -0.21549777686595917, "rewards/unsafe_rewards": -0.2249515801668167, "step": 3140 }, { "epoch": 0.68, "learning_rate": 1.4210955905761807e-07, "logits/chosen": -1.5735523700714111, "logits/rejected": -1.7388004064559937, "logps/chosen": -202.9924774169922, "logps/rejected": -169.76327514648438, "loss": 4608.409, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.19960299134254456, "rewards/margins": 0.05421798676252365, "rewards/rejected": -0.2538209855556488, "rewards/safe_rewards": -0.1971103399991989, "rewards/unsafe_rewards": -0.2079833745956421, "step": 3150 }, { "epoch": 0.68, "learning_rate": 1.404184666075978e-07, "logits/chosen": -1.6197620630264282, "logits/rejected": -1.7358680963516235, "logps/chosen": -193.59860229492188, "logps/rejected": -176.07699584960938, "loss": 4577.9719, "rewards/accuracies": 0.59375, "rewards/chosen": -0.23332929611206055, "rewards/margins": 0.02557211183011532, "rewards/rejected": -0.2589014172554016, "rewards/safe_rewards": -0.23919209837913513, "rewards/unsafe_rewards": -0.23453950881958008, "step": 3160 }, { "epoch": 0.68, "learning_rate": 1.3873355812252693e-07, "logits/chosen": -1.4899275302886963, "logits/rejected": -1.6584640741348267, "logps/chosen": -207.3338623046875, "logps/rejected": -173.78103637695312, "loss": 4228.4383, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.2083674669265747, "rewards/margins": 0.03246257081627846, "rewards/rejected": -0.24083003401756287, "rewards/safe_rewards": -0.21330063045024872, "rewards/unsafe_rewards": -0.1970447152853012, "step": 3170 }, { "epoch": 0.68, "learning_rate": 1.3705492868608148e-07, "logits/chosen": -1.5542356967926025, "logits/rejected": -1.7025644779205322, "logps/chosen": -199.91757202148438, "logps/rejected": -170.31875610351562, "loss": 4491.1234, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2047472894191742, "rewards/margins": 0.04883040860295296, "rewards/rejected": -0.25357770919799805, "rewards/safe_rewards": -0.2202194184064865, "rewards/unsafe_rewards": -0.22429391741752625, "step": 3180 }, { "epoch": 0.69, "learning_rate": 1.3538267302759484e-07, "logits/chosen": -1.5651812553405762, "logits/rejected": -1.728471040725708, "logps/chosen": -202.64808654785156, "logps/rejected": -174.353271484375, "loss": 5085.5727, "rewards/accuracies": 0.65625, "rewards/chosen": -0.20124392211437225, "rewards/margins": 0.03965099900960922, "rewards/rejected": -0.24089495837688446, "rewards/safe_rewards": -0.20891818404197693, "rewards/unsafe_rewards": -0.1991475224494934, "step": 3190 }, { "epoch": 0.69, "learning_rate": 1.3371688551671158e-07, "logits/chosen": -1.51738703250885, "logits/rejected": -1.7102487087249756, "logps/chosen": -207.679931640625, "logps/rejected": -174.0296173095703, "loss": 4710.3656, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.19542929530143738, "rewards/margins": 0.06445382535457611, "rewards/rejected": -0.2598831057548523, "rewards/safe_rewards": -0.18978217244148254, "rewards/unsafe_rewards": -0.18269172310829163, "step": 3200 }, { "epoch": 0.69, "learning_rate": 1.3205766015806253e-07, "logits/chosen": -1.5219790935516357, "logits/rejected": -1.657189965248108, "logps/chosen": -210.4114990234375, "logps/rejected": -181.74635314941406, "loss": 4236.4859, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20510368049144745, "rewards/margins": 0.04666205495595932, "rewards/rejected": -0.25176572799682617, "rewards/safe_rewards": -0.22170217335224152, "rewards/unsafe_rewards": -0.2199019193649292, "step": 3210 }, { "epoch": 0.69, "learning_rate": 1.304050905859595e-07, "logits/chosen": -1.5404077768325806, "logits/rejected": -1.6161432266235352, "logps/chosen": -203.2340545654297, "logps/rejected": -183.6239776611328, "loss": 4843.9836, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21446290612220764, "rewards/margins": 0.02332746610045433, "rewards/rejected": -0.23779037594795227, "rewards/safe_rewards": -0.2106601893901825, "rewards/unsafe_rewards": -0.21920724213123322, "step": 3220 }, { "epoch": 0.7, "learning_rate": 1.2875927005911114e-07, "logits/chosen": -1.567230224609375, "logits/rejected": -1.6935707330703735, "logps/chosen": -197.04910278320312, "logps/rejected": -174.7772674560547, "loss": 4437.9621, "rewards/accuracies": 0.625, "rewards/chosen": -0.226846382021904, "rewards/margins": 0.03391646221280098, "rewards/rejected": -0.2607628405094147, "rewards/safe_rewards": -0.22100695967674255, "rewards/unsafe_rewards": -0.2279137670993805, "step": 3230 }, { "epoch": 0.7, "learning_rate": 1.271202914553605e-07, "logits/chosen": -1.513484239578247, "logits/rejected": -1.6583675146102905, "logps/chosen": -200.95921325683594, "logps/rejected": -172.23715209960938, "loss": 4957.8754, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.19900353252887726, "rewards/margins": 0.05364034324884415, "rewards/rejected": -0.252643883228302, "rewards/safe_rewards": -0.1909950077533722, "rewards/unsafe_rewards": -0.18218687176704407, "step": 3240 }, { "epoch": 0.7, "learning_rate": 1.2548824726644347e-07, "logits/chosen": -1.5379594564437866, "logits/rejected": -1.6838728189468384, "logps/chosen": -201.1802215576172, "logps/rejected": -171.7181854248047, "loss": 4513.3359, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.20569956302642822, "rewards/margins": 0.04141370207071304, "rewards/rejected": -0.24711327254772186, "rewards/safe_rewards": -0.22095346450805664, "rewards/unsafe_rewards": -0.19528479874134064, "step": 3250 }, { "epoch": 0.7, "learning_rate": 1.2386322959276907e-07, "logits/chosen": -1.5139939785003662, "logits/rejected": -1.6627384424209595, "logps/chosen": -212.0193328857422, "logps/rejected": -181.26559448242188, "loss": 4260.6391, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.19795635342597961, "rewards/margins": 0.053702227771282196, "rewards/rejected": -0.2516585886478424, "rewards/safe_rewards": -0.17839138209819794, "rewards/unsafe_rewards": -0.18358327448368073, "step": 3260 }, { "epoch": 0.7, "learning_rate": 1.2224533013822236e-07, "logits/chosen": -1.5436851978302002, "logits/rejected": -1.72197687625885, "logps/chosen": -196.26168823242188, "logps/rejected": -173.25753784179688, "loss": 4223.4074, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.21075502038002014, "rewards/margins": 0.04093529284000397, "rewards/rejected": -0.2516902983188629, "rewards/safe_rewards": -0.2117263525724411, "rewards/unsafe_rewards": -0.22454313933849335, "step": 3270 }, { "epoch": 0.71, "learning_rate": 1.2063464020498919e-07, "logits/chosen": -1.5643657445907593, "logits/rejected": -1.7283353805541992, "logps/chosen": -192.4326171875, "logps/rejected": -173.8364715576172, "loss": 4401.6281, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22436317801475525, "rewards/margins": 0.03776603564620018, "rewards/rejected": -0.2621292471885681, "rewards/safe_rewards": -0.24249792098999023, "rewards/unsafe_rewards": -0.23500247299671173, "step": 3280 }, { "epoch": 0.71, "learning_rate": 1.190312506884035e-07, "logits/chosen": -1.5402178764343262, "logits/rejected": -1.6961244344711304, "logps/chosen": -208.604248046875, "logps/rejected": -178.0830078125, "loss": 4290.5434, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.22383913397789001, "rewards/margins": 0.04307440668344498, "rewards/rejected": -0.2669135630130768, "rewards/safe_rewards": -0.21066319942474365, "rewards/unsafe_rewards": -0.23492050170898438, "step": 3290 }, { "epoch": 0.71, "learning_rate": 1.1743525207181851e-07, "logits/chosen": -1.575769305229187, "logits/rejected": -1.700962781906128, "logps/chosen": -195.44509887695312, "logps/rejected": -176.12158203125, "loss": 4471.0965, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.21908660233020782, "rewards/margins": 0.025014039129018784, "rewards/rejected": -0.2441006451845169, "rewards/safe_rewards": -0.21556958556175232, "rewards/unsafe_rewards": -0.2034239023923874, "step": 3300 }, { "epoch": 0.71, "learning_rate": 1.1584673442149975e-07, "logits/chosen": -1.5367506742477417, "logits/rejected": -1.6448183059692383, "logps/chosen": -194.4497528076172, "logps/rejected": -177.68881225585938, "loss": 4580.968, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.22786736488342285, "rewards/margins": 0.01906476356089115, "rewards/rejected": -0.24693211913108826, "rewards/safe_rewards": -0.22850804030895233, "rewards/unsafe_rewards": -0.22795262932777405, "step": 3310 }, { "epoch": 0.71, "learning_rate": 1.1426578738154307e-07, "logits/chosen": -1.501944661140442, "logits/rejected": -1.6625356674194336, "logps/chosen": -205.0319061279297, "logps/rejected": -173.71832275390625, "loss": 4679.668, "rewards/accuracies": 0.625, "rewards/chosen": -0.21380797028541565, "rewards/margins": 0.033963773399591446, "rewards/rejected": -0.2477717399597168, "rewards/safe_rewards": -0.2320338487625122, "rewards/unsafe_rewards": -0.20674411952495575, "step": 3320 }, { "epoch": 0.72, "learning_rate": 1.1269250016881548e-07, "logits/chosen": -1.5857837200164795, "logits/rejected": -1.685595154762268, "logps/chosen": -193.54312133789062, "logps/rejected": -173.47622680664062, "loss": 4791.8289, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.2283049374818802, "rewards/margins": 0.02232811599969864, "rewards/rejected": -0.2506330609321594, "rewards/safe_rewards": -0.2194657325744629, "rewards/unsafe_rewards": -0.2251621037721634, "step": 3330 }, { "epoch": 0.72, "learning_rate": 1.1112696156792018e-07, "logits/chosen": -1.5373399257659912, "logits/rejected": -1.7343944311141968, "logps/chosen": -207.3426055908203, "logps/rejected": -173.32110595703125, "loss": 4504.4281, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2145494520664215, "rewards/margins": 0.03693012148141861, "rewards/rejected": -0.2514795660972595, "rewards/safe_rewards": -0.22853374481201172, "rewards/unsafe_rewards": -0.20895126461982727, "step": 3340 }, { "epoch": 0.72, "learning_rate": 1.0956925992618677e-07, "logits/chosen": -1.5815013647079468, "logits/rejected": -1.7142406702041626, "logps/chosen": -195.98141479492188, "logps/rejected": -171.9582061767578, "loss": 4565.623, "rewards/accuracies": 0.65625, "rewards/chosen": -0.20546786487102509, "rewards/margins": 0.04988991841673851, "rewards/rejected": -0.2553578317165375, "rewards/safe_rewards": -0.19455237686634064, "rewards/unsafe_rewards": -0.19040384888648987, "step": 3350 }, { "epoch": 0.72, "learning_rate": 1.0801948314868503e-07, "logits/chosen": -1.5395123958587646, "logits/rejected": -1.6681487560272217, "logps/chosen": -199.94140625, "logps/rejected": -171.823486328125, "loss": 4509.9484, "rewards/accuracies": 0.75, "rewards/chosen": -0.20479190349578857, "rewards/margins": 0.056031227111816406, "rewards/rejected": -0.260823130607605, "rewards/safe_rewards": -0.20469561219215393, "rewards/unsafe_rewards": -0.20548808574676514, "step": 3360 }, { "epoch": 0.73, "learning_rate": 1.064777186932647e-07, "logits/chosen": -1.579958200454712, "logits/rejected": -1.704652190208435, "logps/chosen": -193.245361328125, "logps/rejected": -173.85861206054688, "loss": 4913.6008, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.21022441983222961, "rewards/margins": 0.035333260893821716, "rewards/rejected": -0.24555766582489014, "rewards/safe_rewards": -0.20150724053382874, "rewards/unsafe_rewards": -0.20611998438835144, "step": 3370 }, { "epoch": 0.73, "learning_rate": 1.0494405356561977e-07, "logits/chosen": -1.5486366748809814, "logits/rejected": -1.721789002418518, "logps/chosen": -206.40512084960938, "logps/rejected": -170.63851928710938, "loss": 4202.607, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22419854998588562, "rewards/margins": 0.029890745878219604, "rewards/rejected": -0.2540892958641052, "rewards/safe_rewards": -0.22339005768299103, "rewards/unsafe_rewards": -0.22142663598060608, "step": 3380 }, { "epoch": 0.73, "learning_rate": 1.0341857431437829e-07, "logits/chosen": -1.5325825214385986, "logits/rejected": -1.7025810480117798, "logps/chosen": -209.625732421875, "logps/rejected": -179.73287963867188, "loss": 4673.9648, "rewards/accuracies": 0.625, "rewards/chosen": -0.2193862497806549, "rewards/margins": 0.026952456682920456, "rewards/rejected": -0.24633869528770447, "rewards/safe_rewards": -0.2302241027355194, "rewards/unsafe_rewards": -0.21820607781410217, "step": 3390 }, { "epoch": 0.73, "learning_rate": 1.0190136702621877e-07, "logits/chosen": -1.5629844665527344, "logits/rejected": -1.7140281200408936, "logps/chosen": -203.20773315429688, "logps/rejected": -172.52706909179688, "loss": 4231.5273, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21239981055259705, "rewards/margins": 0.036671411246061325, "rewards/rejected": -0.24907124042510986, "rewards/safe_rewards": -0.22368793189525604, "rewards/unsafe_rewards": -0.22601346671581268, "step": 3400 }, { "epoch": 0.73, "learning_rate": 1.0039251732101154e-07, "logits/chosen": -1.5504333972930908, "logits/rejected": -1.7646019458770752, "logps/chosen": -197.95440673828125, "logps/rejected": -167.21658325195312, "loss": 4349.7598, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.205386683344841, "rewards/margins": 0.05040183663368225, "rewards/rejected": -0.25578850507736206, "rewards/safe_rewards": -0.19493213295936584, "rewards/unsafe_rewards": -0.2087542563676834, "step": 3410 }, { "epoch": 0.74, "learning_rate": 9.889211034698747e-08, "logits/chosen": -1.5628987550735474, "logits/rejected": -1.7303714752197266, "logps/chosen": -196.9049072265625, "logps/rejected": -170.43849182128906, "loss": 4740.2758, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22145798802375793, "rewards/margins": 0.03408624976873398, "rewards/rejected": -0.2555442452430725, "rewards/safe_rewards": -0.21972405910491943, "rewards/unsafe_rewards": -0.23050184547901154, "step": 3420 }, { "epoch": 0.74, "learning_rate": 9.740023077593231e-08, "logits/chosen": -1.5358574390411377, "logits/rejected": -1.663415551185608, "logps/chosen": -202.64572143554688, "logps/rejected": -177.1818389892578, "loss": 4499.4555, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20824941992759705, "rewards/margins": 0.047806404531002045, "rewards/rejected": -0.2560558319091797, "rewards/safe_rewards": -0.20281240344047546, "rewards/unsafe_rewards": -0.1868453323841095, "step": 3430 }, { "epoch": 0.74, "learning_rate": 9.591696279840905e-08, "logits/chosen": -1.5135457515716553, "logits/rejected": -1.663956642150879, "logps/chosen": -206.18991088867188, "logps/rejected": -179.43446350097656, "loss": 4999.884, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.22454258799552917, "rewards/margins": 0.030628979206085205, "rewards/rejected": -0.2551715672016144, "rewards/safe_rewards": -0.22517065703868866, "rewards/unsafe_rewards": -0.22754926979541779, "step": 3440 }, { "epoch": 0.74, "learning_rate": 9.444239011900648e-08, "logits/chosen": -1.5009418725967407, "logits/rejected": -1.7027199268341064, "logps/chosen": -201.20140075683594, "logps/rejected": -173.31846618652344, "loss": 4538.8316, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.21497246623039246, "rewards/margins": 0.03327861800789833, "rewards/rejected": -0.24825111031532288, "rewards/safe_rewards": -0.20281513035297394, "rewards/unsafe_rewards": -0.19864460825920105, "step": 3450 }, { "epoch": 0.74, "learning_rate": 9.297659595161534e-08, "logits/chosen": -1.5617711544036865, "logits/rejected": -1.6888530254364014, "logps/chosen": -195.1778564453125, "logps/rejected": -169.85659790039062, "loss": 4281.0555, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2031358778476715, "rewards/margins": 0.05218507722020149, "rewards/rejected": -0.2553209662437439, "rewards/safe_rewards": -0.21664252877235413, "rewards/unsafe_rewards": -0.20309622585773468, "step": 3460 }, { "epoch": 0.75, "learning_rate": 9.15196630147329e-08, "logits/chosen": -1.5777266025543213, "logits/rejected": -1.7069259881973267, "logps/chosen": -197.0939178466797, "logps/rejected": -174.30386352539062, "loss": 4451.8719, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.24085012078285217, "rewards/margins": 0.02670753002166748, "rewards/rejected": -0.26755762100219727, "rewards/safe_rewards": -0.23957185447216034, "rewards/unsafe_rewards": -0.24107328057289124, "step": 3470 }, { "epoch": 0.75, "learning_rate": 9.007167352679432e-08, "logits/chosen": -1.5664374828338623, "logits/rejected": -1.718407392501831, "logps/chosen": -204.78367614746094, "logps/rejected": -173.39218139648438, "loss": 4349.3953, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2090870887041092, "rewards/margins": 0.05152062699198723, "rewards/rejected": -0.2606077194213867, "rewards/safe_rewards": -0.21288371086120605, "rewards/unsafe_rewards": -0.19556090235710144, "step": 3480 }, { "epoch": 0.75, "learning_rate": 8.863270920153342e-08, "logits/chosen": -1.5450637340545654, "logits/rejected": -1.7116619348526, "logps/chosen": -204.75596618652344, "logps/rejected": -173.54534912109375, "loss": 4506.9895, "rewards/accuracies": 0.6875, "rewards/chosen": -0.21023401618003845, "rewards/margins": 0.05022934079170227, "rewards/rejected": -0.2604633569717407, "rewards/safe_rewards": -0.2224196195602417, "rewards/unsafe_rewards": -0.23853039741516113, "step": 3490 }, { "epoch": 0.75, "learning_rate": 8.720285124337107e-08, "logits/chosen": -1.5727078914642334, "logits/rejected": -1.739986777305603, "logps/chosen": -198.4178466796875, "logps/rejected": -170.70401000976562, "loss": 4613.8953, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.22135527431964874, "rewards/margins": 0.04793670028448105, "rewards/rejected": -0.2692919671535492, "rewards/safe_rewards": -0.21812447905540466, "rewards/unsafe_rewards": -0.22906669974327087, "step": 3500 }, { "epoch": 0.75, "eval_logits/chosen": -1.582217812538147, "eval_logits/rejected": -1.728867769241333, "eval_logps/chosen": -201.8304443359375, "eval_logps/rejected": -173.95654296875, "eval_loss": 4667.72998046875, "eval_rewards/accuracies": 0.6333494782447815, "eval_rewards/chosen": -0.22014380991458893, "eval_rewards/margins": 0.03595419600605965, "eval_rewards/rejected": -0.25609803199768066, "eval_rewards/safe_rewards": -0.21820080280303955, "eval_rewards/unsafe_rewards": -0.21815498173236847, "eval_runtime": 1192.5958, "eval_samples_per_second": 27.708, "eval_steps_per_second": 0.866, "step": 3500 }, { "epoch": 0.76, "learning_rate": 8.578218034283247e-08, "logits/chosen": -1.4781768321990967, "logits/rejected": -1.6592490673065186, "logps/chosen": -215.88845825195312, "logps/rejected": -181.23204040527344, "loss": 4519.3055, "rewards/accuracies": 0.65625, "rewards/chosen": -0.21283051371574402, "rewards/margins": 0.04599450156092644, "rewards/rejected": -0.25882503390312195, "rewards/safe_rewards": -0.22382798790931702, "rewards/unsafe_rewards": -0.21961776912212372, "step": 3510 }, { "epoch": 0.76, "learning_rate": 8.437077667199402e-08, "logits/chosen": -1.5010335445404053, "logits/rejected": -1.6410468816757202, "logps/chosen": -204.42442321777344, "logps/rejected": -183.1348876953125, "loss": 4484.3266, "rewards/accuracies": 0.6875, "rewards/chosen": -0.1902398020029068, "rewards/margins": 0.04791343957185745, "rewards/rejected": -0.23815324902534485, "rewards/safe_rewards": -0.19965849816799164, "rewards/unsafe_rewards": -0.18286621570587158, "step": 3520 }, { "epoch": 0.76, "learning_rate": 8.296871987995849e-08, "logits/chosen": -1.5453994274139404, "logits/rejected": -1.7193000316619873, "logps/chosen": -203.30819702148438, "logps/rejected": -177.2645263671875, "loss": 4623.2781, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20352378487586975, "rewards/margins": 0.05504323169589043, "rewards/rejected": -0.25856703519821167, "rewards/safe_rewards": -0.19601953029632568, "rewards/unsafe_rewards": -0.19860202074050903, "step": 3530 }, { "epoch": 0.76, "learning_rate": 8.15760890883607e-08, "logits/chosen": -1.5572454929351807, "logits/rejected": -1.6609055995941162, "logps/chosen": -203.43728637695312, "logps/rejected": -177.62045288085938, "loss": 4518.6953, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22707335650920868, "rewards/margins": 0.03320090472698212, "rewards/rejected": -0.2602742612361908, "rewards/safe_rewards": -0.21548041701316833, "rewards/unsafe_rewards": -0.21296091377735138, "step": 3540 }, { "epoch": 0.76, "learning_rate": 8.019296288690225e-08, "logits/chosen": -1.523084044456482, "logits/rejected": -1.6730248928070068, "logps/chosen": -193.7915496826172, "logps/rejected": -170.6090545654297, "loss": 4808.1777, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.21210840344429016, "rewards/margins": 0.051982581615448, "rewards/rejected": -0.26409098505973816, "rewards/safe_rewards": -0.21478097140789032, "rewards/unsafe_rewards": -0.19741640985012054, "step": 3550 }, { "epoch": 0.77, "learning_rate": 7.881941932891628e-08, "logits/chosen": -1.5641376972198486, "logits/rejected": -1.6890513896942139, "logps/chosen": -194.4353485107422, "logps/rejected": -175.99887084960938, "loss": 4582.9, "rewards/accuracies": 0.625, "rewards/chosen": -0.2206435650587082, "rewards/margins": 0.03884998336434364, "rewards/rejected": -0.25949355959892273, "rewards/safe_rewards": -0.20557653903961182, "rewards/unsafe_rewards": -0.23320654034614563, "step": 3560 }, { "epoch": 0.77, "learning_rate": 7.745553592696333e-08, "logits/chosen": -1.531445860862732, "logits/rejected": -1.665540337562561, "logps/chosen": -208.8480682373047, "logps/rejected": -180.33901977539062, "loss": 4861.0609, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21229633688926697, "rewards/margins": 0.03237538784742355, "rewards/rejected": -0.24467173218727112, "rewards/safe_rewards": -0.23713278770446777, "rewards/unsafe_rewards": -0.2037380188703537, "step": 3570 }, { "epoch": 0.77, "learning_rate": 7.610138964845633e-08, "logits/chosen": -1.563547134399414, "logits/rejected": -1.6922988891601562, "logps/chosen": -199.64175415039062, "logps/rejected": -174.66647338867188, "loss": 4425.6211, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22444351017475128, "rewards/margins": 0.026137415319681168, "rewards/rejected": -0.25058090686798096, "rewards/safe_rewards": -0.22032615542411804, "rewards/unsafe_rewards": -0.21661004424095154, "step": 3580 }, { "epoch": 0.77, "learning_rate": 7.475705691131795e-08, "logits/chosen": -1.5212424993515015, "logits/rejected": -1.6964725255966187, "logps/chosen": -201.70118713378906, "logps/rejected": -170.40756225585938, "loss": 4419.0777, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20879754424095154, "rewards/margins": 0.053098440170288086, "rewards/rejected": -0.261896014213562, "rewards/safe_rewards": -0.2185305655002594, "rewards/unsafe_rewards": -0.20323090255260468, "step": 3590 }, { "epoch": 0.77, "learning_rate": 7.342261357966736e-08, "logits/chosen": -1.5294665098190308, "logits/rejected": -1.7083766460418701, "logps/chosen": -201.69686889648438, "logps/rejected": -176.46823120117188, "loss": 4720.5191, "rewards/accuracies": 0.625, "rewards/chosen": -0.21761341392993927, "rewards/margins": 0.036710239946842194, "rewards/rejected": -0.25432366132736206, "rewards/safe_rewards": -0.20865118503570557, "rewards/unsafe_rewards": -0.22028927505016327, "step": 3600 }, { "epoch": 0.78, "learning_rate": 7.209813495953962e-08, "logits/chosen": -1.5702482461929321, "logits/rejected": -1.7579265832901, "logps/chosen": -207.1014862060547, "logps/rejected": -176.28318786621094, "loss": 4590.3305, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.20807337760925293, "rewards/margins": 0.06202497333288193, "rewards/rejected": -0.27009838819503784, "rewards/safe_rewards": -0.20201008021831512, "rewards/unsafe_rewards": -0.21050193905830383, "step": 3610 }, { "epoch": 0.78, "learning_rate": 7.07836957946358e-08, "logits/chosen": -1.5720595121383667, "logits/rejected": -1.745388388633728, "logps/chosen": -198.0412139892578, "logps/rejected": -167.8216094970703, "loss": 4546.0297, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.23862460255622864, "rewards/margins": 0.036099765449762344, "rewards/rejected": -0.2747243344783783, "rewards/safe_rewards": -0.2615331709384918, "rewards/unsafe_rewards": -0.24627690017223358, "step": 3620 }, { "epoch": 0.78, "learning_rate": 6.947937026210469e-08, "logits/chosen": -1.5645544528961182, "logits/rejected": -1.7412183284759521, "logps/chosen": -209.1239013671875, "logps/rejected": -175.35061645507812, "loss": 4209.6773, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.2278406322002411, "rewards/margins": 0.033286403864622116, "rewards/rejected": -0.2611270546913147, "rewards/safe_rewards": -0.23315083980560303, "rewards/unsafe_rewards": -0.22243352234363556, "step": 3630 }, { "epoch": 0.78, "learning_rate": 6.818523196835734e-08, "logits/chosen": -1.5257383584976196, "logits/rejected": -1.6933667659759521, "logps/chosen": -206.2113494873047, "logps/rejected": -173.90744018554688, "loss": 4540.3672, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21197137236595154, "rewards/margins": 0.04024039953947067, "rewards/rejected": -0.2522117495536804, "rewards/safe_rewards": -0.20645764470100403, "rewards/unsafe_rewards": -0.226496621966362, "step": 3640 }, { "epoch": 0.79, "learning_rate": 6.690135394491272e-08, "logits/chosen": -1.5711628198623657, "logits/rejected": -1.7121121883392334, "logps/chosen": -192.0582275390625, "logps/rejected": -169.54051208496094, "loss": 4504.5547, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2349282205104828, "rewards/margins": 0.023118287324905396, "rewards/rejected": -0.25804653763771057, "rewards/safe_rewards": -0.24355223774909973, "rewards/unsafe_rewards": -0.25344371795654297, "step": 3650 }, { "epoch": 0.79, "learning_rate": 6.562780864427681e-08, "logits/chosen": -1.545829176902771, "logits/rejected": -1.679800033569336, "logps/chosen": -199.39111328125, "logps/rejected": -175.27809143066406, "loss": 4719.975, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.22138893604278564, "rewards/margins": 0.030624713748693466, "rewards/rejected": -0.252013623714447, "rewards/safe_rewards": -0.2136097401380539, "rewards/unsafe_rewards": -0.20933452248573303, "step": 3660 }, { "epoch": 0.79, "learning_rate": 6.436466793585371e-08, "logits/chosen": -1.5915729999542236, "logits/rejected": -1.694249153137207, "logps/chosen": -206.84304809570312, "logps/rejected": -176.1241912841797, "loss": 4112.1449, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.21597877144813538, "rewards/margins": 0.04085611552000046, "rewards/rejected": -0.25683489441871643, "rewards/safe_rewards": -0.23224084079265594, "rewards/unsafe_rewards": -0.21830904483795166, "step": 3670 }, { "epoch": 0.79, "learning_rate": 6.311200310188974e-08, "logits/chosen": -1.5542157888412476, "logits/rejected": -1.702500581741333, "logps/chosen": -198.125732421875, "logps/rejected": -173.75057983398438, "loss": 4309.7465, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.21638193726539612, "rewards/margins": 0.03397037461400032, "rewards/rejected": -0.25035232305526733, "rewards/safe_rewards": -0.23101818561553955, "rewards/unsafe_rewards": -0.22149118781089783, "step": 3680 }, { "epoch": 0.79, "learning_rate": 6.186988483345116e-08, "logits/chosen": -1.5018908977508545, "logits/rejected": -1.6514050960540771, "logps/chosen": -201.4400634765625, "logps/rejected": -175.40042114257812, "loss": 4692.3234, "rewards/accuracies": 0.6875, "rewards/chosen": -0.20784702897071838, "rewards/margins": 0.05187133699655533, "rewards/rejected": -0.2597183585166931, "rewards/safe_rewards": -0.2138625681400299, "rewards/unsafe_rewards": -0.21867568790912628, "step": 3690 }, { "epoch": 0.8, "learning_rate": 6.063838322643455e-08, "logits/chosen": -1.555877447128296, "logits/rejected": -1.6665763854980469, "logps/chosen": -202.6346435546875, "logps/rejected": -176.34506225585938, "loss": 4344.4145, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.2233920395374298, "rewards/margins": 0.03521738201379776, "rewards/rejected": -0.2586093842983246, "rewards/safe_rewards": -0.20836105942726135, "rewards/unsafe_rewards": -0.2061535120010376, "step": 3700 }, { "epoch": 0.8, "learning_rate": 5.9417567777611226e-08, "logits/chosen": -1.5634253025054932, "logits/rejected": -1.673022985458374, "logps/chosen": -207.9368133544922, "logps/rejected": -177.94314575195312, "loss": 4480.0738, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.21394410729408264, "rewards/margins": 0.039593279361724854, "rewards/rejected": -0.2535373866558075, "rewards/safe_rewards": -0.20515453815460205, "rewards/unsafe_rewards": -0.19917435944080353, "step": 3710 }, { "epoch": 0.8, "learning_rate": 5.820750738070551e-08, "logits/chosen": -1.5719643831253052, "logits/rejected": -1.6915416717529297, "logps/chosen": -206.11474609375, "logps/rejected": -176.34048461914062, "loss": 4529.9516, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21438832581043243, "rewards/margins": 0.051191218197345734, "rewards/rejected": -0.2655795216560364, "rewards/safe_rewards": -0.2100793570280075, "rewards/unsafe_rewards": -0.2091672420501709, "step": 3720 }, { "epoch": 0.8, "learning_rate": 5.700827032250671e-08, "logits/chosen": -1.526563286781311, "logits/rejected": -1.7206132411956787, "logps/chosen": -211.257568359375, "logps/rejected": -175.07769775390625, "loss": 4198.8828, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.20107205212116241, "rewards/margins": 0.04433692246675491, "rewards/rejected": -0.2454090118408203, "rewards/safe_rewards": -0.21468767523765564, "rewards/unsafe_rewards": -0.20043781399726868, "step": 3730 }, { "epoch": 0.8, "learning_rate": 5.5819924279015494e-08, "logits/chosen": -1.5858070850372314, "logits/rejected": -1.710129976272583, "logps/chosen": -196.7711181640625, "logps/rejected": -175.53395080566406, "loss": 4104.1645, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.2088882029056549, "rewards/margins": 0.030957471579313278, "rewards/rejected": -0.2398456633090973, "rewards/safe_rewards": -0.20901119709014893, "rewards/unsafe_rewards": -0.21478113532066345, "step": 3740 }, { "epoch": 0.81, "learning_rate": 5.464253631162491e-08, "logits/chosen": -1.5419082641601562, "logits/rejected": -1.6432714462280273, "logps/chosen": -202.7676239013672, "logps/rejected": -173.409423828125, "loss": 4565.3871, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22782376408576965, "rewards/margins": 0.028117746114730835, "rewards/rejected": -0.2559414803981781, "rewards/safe_rewards": -0.2293321192264557, "rewards/unsafe_rewards": -0.24931958317756653, "step": 3750 }, { "epoch": 0.81, "learning_rate": 5.347617286333597e-08, "logits/chosen": -1.535449743270874, "logits/rejected": -1.6785986423492432, "logps/chosen": -197.4762420654297, "logps/rejected": -173.0891571044922, "loss": 4565.5234, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22153358161449432, "rewards/margins": 0.050920527428388596, "rewards/rejected": -0.2724541425704956, "rewards/safe_rewards": -0.2115567922592163, "rewards/unsafe_rewards": -0.23430593311786652, "step": 3760 }, { "epoch": 0.81, "learning_rate": 5.232089975500773e-08, "logits/chosen": -1.5155491828918457, "logits/rejected": -1.6737333536148071, "logps/chosen": -206.5082244873047, "logps/rejected": -178.2578582763672, "loss": 4486.943, "rewards/accuracies": 0.65625, "rewards/chosen": -0.2322498857975006, "rewards/margins": 0.03819186985492706, "rewards/rejected": -0.2704417407512665, "rewards/safe_rewards": -0.21915295720100403, "rewards/unsafe_rewards": -0.24670401215553284, "step": 3770 }, { "epoch": 0.81, "learning_rate": 5.117678218164337e-08, "logits/chosen": -1.5510855913162231, "logits/rejected": -1.6721782684326172, "logps/chosen": -199.70701599121094, "logps/rejected": -179.46823120117188, "loss": 4591.2012, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.22835128009319305, "rewards/margins": 0.03821973502635956, "rewards/rejected": -0.266571044921875, "rewards/safe_rewards": -0.2250767946243286, "rewards/unsafe_rewards": -0.23198679089546204, "step": 3780 }, { "epoch": 0.82, "learning_rate": 5.004388470871079e-08, "logits/chosen": -1.4897185564041138, "logits/rejected": -1.6097478866577148, "logps/chosen": -199.941650390625, "logps/rejected": -176.46286010742188, "loss": 4533.5742, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.22109904885292053, "rewards/margins": 0.036258306354284286, "rewards/rejected": -0.2573573589324951, "rewards/safe_rewards": -0.21302075684070587, "rewards/unsafe_rewards": -0.21134182810783386, "step": 3790 }, { "epoch": 0.82, "learning_rate": 4.8922271268498906e-08, "logits/chosen": -1.5674362182617188, "logits/rejected": -1.6754087209701538, "logps/chosen": -194.15757751464844, "logps/rejected": -174.41043090820312, "loss": 4446.6859, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.22233310341835022, "rewards/margins": 0.03545377403497696, "rewards/rejected": -0.2577868700027466, "rewards/safe_rewards": -0.23787930607795715, "rewards/unsafe_rewards": -0.22785861790180206, "step": 3800 }, { "epoch": 0.82, "learning_rate": 4.781200515651015e-08, "logits/chosen": -1.5203776359558105, "logits/rejected": -1.708418607711792, "logps/chosen": -205.74319458007812, "logps/rejected": -171.13157653808594, "loss": 4508.541, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.2302251160144806, "rewards/margins": 0.026879018172621727, "rewards/rejected": -0.25710412859916687, "rewards/safe_rewards": -0.22207477688789368, "rewards/unsafe_rewards": -0.2511066198348999, "step": 3810 }, { "epoch": 0.82, "learning_rate": 4.671314902788812e-08, "logits/chosen": -1.540942907333374, "logits/rejected": -1.6978009939193726, "logps/chosen": -206.22146606445312, "logps/rejected": -177.33303833007812, "loss": 4074.5801, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.228041410446167, "rewards/margins": 0.037344031035900116, "rewards/rejected": -0.2653854489326477, "rewards/safe_rewards": -0.2249521017074585, "rewards/unsafe_rewards": -0.23318883776664734, "step": 3820 }, { "epoch": 0.82, "learning_rate": 4.562576489388212e-08, "logits/chosen": -1.5757877826690674, "logits/rejected": -1.739296555519104, "logps/chosen": -200.76785278320312, "logps/rejected": -171.3051300048828, "loss": 4265.4422, "rewards/accuracies": 0.59375, "rewards/chosen": -0.21877846121788025, "rewards/margins": 0.036117374897003174, "rewards/rejected": -0.2548958361148834, "rewards/safe_rewards": -0.21657781302928925, "rewards/unsafe_rewards": -0.22746041417121887, "step": 3830 }, { "epoch": 0.83, "learning_rate": 4.454991411834766e-08, "logits/chosen": -1.5159938335418701, "logits/rejected": -1.7283504009246826, "logps/chosen": -204.42532348632812, "logps/rejected": -173.23770141601562, "loss": 4549.675, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.21094194054603577, "rewards/margins": 0.04501693323254585, "rewards/rejected": -0.2559588551521301, "rewards/safe_rewards": -0.1877421885728836, "rewards/unsafe_rewards": -0.18227200210094452, "step": 3840 }, { "epoch": 0.83, "learning_rate": 4.348565741428323e-08, "logits/chosen": -1.5113378763198853, "logits/rejected": -1.657496452331543, "logps/chosen": -212.8829345703125, "logps/rejected": -182.03335571289062, "loss": 4059.1629, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.22388973832130432, "rewards/margins": 0.03489254042506218, "rewards/rejected": -0.2587822675704956, "rewards/safe_rewards": -0.23313137888908386, "rewards/unsafe_rewards": -0.21989551186561584, "step": 3850 }, { "epoch": 0.83, "learning_rate": 4.24330548404046e-08, "logits/chosen": -1.5456483364105225, "logits/rejected": -1.743522047996521, "logps/chosen": -205.0988006591797, "logps/rejected": -168.6521759033203, "loss": 4564.4871, "rewards/accuracies": 0.65625, "rewards/chosen": -0.21663382649421692, "rewards/margins": 0.04531814157962799, "rewards/rejected": -0.2619519531726837, "rewards/safe_rewards": -0.2313702404499054, "rewards/unsafe_rewards": -0.23512372374534607, "step": 3860 }, { "epoch": 0.83, "learning_rate": 4.1392165797755066e-08, "logits/chosen": -1.5734905004501343, "logits/rejected": -1.6539456844329834, "logps/chosen": -200.99014282226562, "logps/rejected": -178.3603057861328, "loss": 4218.4957, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2177286595106125, "rewards/margins": 0.0380244180560112, "rewards/rejected": -0.2557530701160431, "rewards/safe_rewards": -0.22618231177330017, "rewards/unsafe_rewards": -0.240372896194458, "step": 3870 }, { "epoch": 0.83, "learning_rate": 4.036304902635374e-08, "logits/chosen": -1.56974196434021, "logits/rejected": -1.7335970401763916, "logps/chosen": -207.0657196044922, "logps/rejected": -179.28897094726562, "loss": 4337.1168, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.21363647282123566, "rewards/margins": 0.06000277400016785, "rewards/rejected": -0.2736392021179199, "rewards/safe_rewards": -0.20769217610359192, "rewards/unsafe_rewards": -0.20762905478477478, "step": 3880 }, { "epoch": 0.84, "learning_rate": 3.9345762601880414e-08, "logits/chosen": -1.5197885036468506, "logits/rejected": -1.6342586278915405, "logps/chosen": -209.10311889648438, "logps/rejected": -178.88075256347656, "loss": 4456.1254, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20029421150684357, "rewards/margins": 0.043639883399009705, "rewards/rejected": -0.24393412470817566, "rewards/safe_rewards": -0.19951418042182922, "rewards/unsafe_rewards": -0.2195533812046051, "step": 3890 }, { "epoch": 0.84, "learning_rate": 3.83403639323982e-08, "logits/chosen": -1.54450261592865, "logits/rejected": -1.7031257152557373, "logps/chosen": -207.55300903320312, "logps/rejected": -174.103759765625, "loss": 4589.7766, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.220820814371109, "rewards/margins": 0.03926758095622063, "rewards/rejected": -0.26008838415145874, "rewards/safe_rewards": -0.23643799126148224, "rewards/unsafe_rewards": -0.2354736030101776, "step": 3900 }, { "epoch": 0.84, "learning_rate": 3.7346909755114094e-08, "logits/chosen": -1.5512750148773193, "logits/rejected": -1.716810941696167, "logps/chosen": -195.9817657470703, "logps/rejected": -171.260009765625, "loss": 4347.193, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2327992171049118, "rewards/margins": 0.04027698189020157, "rewards/rejected": -0.2730761766433716, "rewards/safe_rewards": -0.21529337763786316, "rewards/unsafe_rewards": -0.21767893433570862, "step": 3910 }, { "epoch": 0.84, "learning_rate": 3.636545613317676e-08, "logits/chosen": -1.5541023015975952, "logits/rejected": -1.6840072870254517, "logps/chosen": -197.68307495117188, "logps/rejected": -174.50225830078125, "loss": 4547.9578, "rewards/accuracies": 0.65625, "rewards/chosen": -0.21467241644859314, "rewards/margins": 0.04562082514166832, "rewards/rejected": -0.26029324531555176, "rewards/safe_rewards": -0.18482406437397003, "rewards/unsafe_rewards": -0.21347124874591827, "step": 3920 }, { "epoch": 0.85, "learning_rate": 3.539605845251323e-08, "logits/chosen": -1.5333888530731201, "logits/rejected": -1.6673816442489624, "logps/chosen": -201.00540161132812, "logps/rejected": -178.5613250732422, "loss": 4299.5359, "rewards/accuracies": 0.65625, "rewards/chosen": -0.22972938418388367, "rewards/margins": 0.035122793167829514, "rewards/rejected": -0.26485222578048706, "rewards/safe_rewards": -0.22891971468925476, "rewards/unsafe_rewards": -0.22931250929832458, "step": 3930 }, { "epoch": 0.85, "learning_rate": 3.44387714187028e-08, "logits/chosen": -1.54409658908844, "logits/rejected": -1.6842076778411865, "logps/chosen": -198.9086456298828, "logps/rejected": -172.55484008789062, "loss": 4685.8492, "rewards/accuracies": 0.65625, "rewards/chosen": -0.21229323744773865, "rewards/margins": 0.04917289689183235, "rewards/rejected": -0.2614661157131195, "rewards/safe_rewards": -0.21990077197551727, "rewards/unsafe_rewards": -0.2349376678466797, "step": 3940 }, { "epoch": 0.85, "learning_rate": 3.349364905389032e-08, "logits/chosen": -1.529813528060913, "logits/rejected": -1.655125617980957, "logps/chosen": -204.35086059570312, "logps/rejected": -172.13148498535156, "loss": 4502.7355, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21679949760437012, "rewards/margins": 0.04756585881114006, "rewards/rejected": -0.2643653452396393, "rewards/safe_rewards": -0.206808403134346, "rewards/unsafe_rewards": -0.2187560796737671, "step": 3950 }, { "epoch": 0.85, "learning_rate": 3.256074469373743e-08, "logits/chosen": -1.5653387308120728, "logits/rejected": -1.676509141921997, "logps/chosen": -201.04916381835938, "logps/rejected": -174.2598114013672, "loss": 4192.7984, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22048762440681458, "rewards/margins": 0.042679641395807266, "rewards/rejected": -0.26316726207733154, "rewards/safe_rewards": -0.21230462193489075, "rewards/unsafe_rewards": -0.22371487319469452, "step": 3960 }, { "epoch": 0.85, "learning_rate": 3.164011098441241e-08, "logits/chosen": -1.5919193029403687, "logits/rejected": -1.6907579898834229, "logps/chosen": -197.35784912109375, "logps/rejected": -176.74856567382812, "loss": 4614.6691, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.2443159520626068, "rewards/margins": 0.02695249579846859, "rewards/rejected": -0.27126845717430115, "rewards/safe_rewards": -0.24112653732299805, "rewards/unsafe_rewards": -0.2611249089241028, "step": 3970 }, { "epoch": 0.86, "learning_rate": 3.073179987961974e-08, "logits/chosen": -1.4958531856536865, "logits/rejected": -1.642984390258789, "logps/chosen": -210.90951538085938, "logps/rejected": -178.82684326171875, "loss": 4404.5914, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.21953506767749786, "rewards/margins": 0.06276973336935043, "rewards/rejected": -0.2823048233985901, "rewards/safe_rewards": -0.2320580780506134, "rewards/unsafe_rewards": -0.2204265147447586, "step": 3980 }, { "epoch": 0.86, "learning_rate": 2.983586263766777e-08, "logits/chosen": -1.5084037780761719, "logits/rejected": -1.6767162084579468, "logps/chosen": -204.7137451171875, "logps/rejected": -175.34596252441406, "loss": 4537.6961, "rewards/accuracies": 0.6875, "rewards/chosen": -0.20422199368476868, "rewards/margins": 0.052533071488142014, "rewards/rejected": -0.2567550539970398, "rewards/safe_rewards": -0.20250673592090607, "rewards/unsafe_rewards": -0.21399053931236267, "step": 3990 }, { "epoch": 0.86, "learning_rate": 2.895234981857633e-08, "logits/chosen": -1.569875955581665, "logits/rejected": -1.7342551946640015, "logps/chosen": -195.85757446289062, "logps/rejected": -167.7862091064453, "loss": 4642.2859, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.22548839449882507, "rewards/margins": 0.047246869653463364, "rewards/rejected": -0.2727352976799011, "rewards/safe_rewards": -0.21761241555213928, "rewards/unsafe_rewards": -0.20267339050769806, "step": 4000 }, { "epoch": 0.86, "eval_logits/chosen": -1.5832650661468506, "eval_logits/rejected": -1.7297797203063965, "eval_logps/chosen": -202.3949737548828, "eval_logps/rejected": -174.6187744140625, "eval_loss": 4661.87451171875, "eval_rewards/accuracies": 0.6335914731025696, "eval_rewards/chosen": -0.22578901052474976, "eval_rewards/margins": 0.036931220442056656, "eval_rewards/rejected": -0.2627202868461609, "eval_rewards/safe_rewards": -0.2238258719444275, "eval_rewards/unsafe_rewards": -0.22377504408359528, "eval_runtime": 1191.4835, "eval_samples_per_second": 27.733, "eval_steps_per_second": 0.867, "step": 4000 }, { "epoch": 0.86, "learning_rate": 2.8081311281223513e-08, "logits/chosen": -1.5415151119232178, "logits/rejected": -1.6680958271026611, "logps/chosen": -198.67210388183594, "logps/rejected": -176.63148498535156, "loss": 4739.6648, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21865084767341614, "rewards/margins": 0.0354168526828289, "rewards/rejected": -0.25406768918037415, "rewards/safe_rewards": -0.23806777596473694, "rewards/unsafe_rewards": -0.22812950611114502, "step": 4010 }, { "epoch": 0.87, "learning_rate": 2.7222796180531748e-08, "logits/chosen": -1.5459169149398804, "logits/rejected": -1.6988670825958252, "logps/chosen": -203.52243041992188, "logps/rejected": -181.1127471923828, "loss": 4620.7664, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1938166320323944, "rewards/margins": 0.06123456358909607, "rewards/rejected": -0.2550511956214905, "rewards/safe_rewards": -0.2138896882534027, "rewards/unsafe_rewards": -0.2022998332977295, "step": 4020 }, { "epoch": 0.87, "learning_rate": 2.637685296469422e-08, "logits/chosen": -1.5451260805130005, "logits/rejected": -1.6546366214752197, "logps/chosen": -199.86761474609375, "logps/rejected": -177.29090881347656, "loss": 4316.0938, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.23968538641929626, "rewards/margins": 0.02805022895336151, "rewards/rejected": -0.26773563027381897, "rewards/safe_rewards": -0.2209528386592865, "rewards/unsafe_rewards": -0.23488792777061462, "step": 4030 }, { "epoch": 0.87, "learning_rate": 2.5543529372440475e-08, "logits/chosen": -1.5570924282073975, "logits/rejected": -1.6944078207015991, "logps/chosen": -206.784423828125, "logps/rejected": -177.7298126220703, "loss": 4461.1797, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.21581673622131348, "rewards/margins": 0.07267836481332779, "rewards/rejected": -0.28849509358406067, "rewards/safe_rewards": -0.21017804741859436, "rewards/unsafe_rewards": -0.23137512803077698, "step": 4040 }, { "epoch": 0.87, "learning_rate": 2.472287243034274e-08, "logits/chosen": -1.5541603565216064, "logits/rejected": -1.690901517868042, "logps/chosen": -193.99813842773438, "logps/rejected": -168.92327880859375, "loss": 4745.1195, "rewards/accuracies": 0.65625, "rewards/chosen": -0.2187739908695221, "rewards/margins": 0.05106813833117485, "rewards/rejected": -0.26984211802482605, "rewards/safe_rewards": -0.20875611901283264, "rewards/unsafe_rewards": -0.22367683053016663, "step": 4050 }, { "epoch": 0.87, "learning_rate": 2.391492845016188e-08, "logits/chosen": -1.542761206626892, "logits/rejected": -1.7329285144805908, "logps/chosen": -200.78683471679688, "logps/rejected": -175.85972595214844, "loss": 4640.5484, "rewards/accuracies": 0.625, "rewards/chosen": -0.20942091941833496, "rewards/margins": 0.052261173725128174, "rewards/rejected": -0.26168206334114075, "rewards/safe_rewards": -0.21483008563518524, "rewards/unsafe_rewards": -0.21310248970985413, "step": 4060 }, { "epoch": 0.88, "learning_rate": 2.311974302623379e-08, "logits/chosen": -1.6087595224380493, "logits/rejected": -1.7294340133666992, "logps/chosen": -202.1204376220703, "logps/rejected": -180.51797485351562, "loss": 4259.0211, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2266724556684494, "rewards/margins": 0.033739060163497925, "rewards/rejected": -0.26041150093078613, "rewards/safe_rewards": -0.22564931213855743, "rewards/unsafe_rewards": -0.21629604697227478, "step": 4070 }, { "epoch": 0.88, "learning_rate": 2.2337361032896767e-08, "logits/chosen": -1.5471532344818115, "logits/rejected": -1.6578731536865234, "logps/chosen": -203.94021606445312, "logps/rejected": -177.37997436523438, "loss": 4412.5059, "rewards/accuracies": 0.625, "rewards/chosen": -0.22492928802967072, "rewards/margins": 0.0362665168941021, "rewards/rejected": -0.2611957788467407, "rewards/safe_rewards": -0.21642470359802246, "rewards/unsafe_rewards": -0.2320546656847, "step": 4080 }, { "epoch": 0.88, "learning_rate": 2.1567826621958746e-08, "logits/chosen": -1.5730527639389038, "logits/rejected": -1.6890232563018799, "logps/chosen": -203.07565307617188, "logps/rejected": -177.7918701171875, "loss": 4680.6594, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23096367716789246, "rewards/margins": 0.036701567471027374, "rewards/rejected": -0.2676652669906616, "rewards/safe_rewards": -0.2325846254825592, "rewards/unsafe_rewards": -0.2197273075580597, "step": 4090 }, { "epoch": 0.88, "learning_rate": 2.081118322020603e-08, "logits/chosen": -1.5450632572174072, "logits/rejected": -1.6927597522735596, "logps/chosen": -197.76345825195312, "logps/rejected": -175.14407348632812, "loss": 4605.4648, "rewards/accuracies": 0.65625, "rewards/chosen": -0.22360114753246307, "rewards/margins": 0.043504662811756134, "rewards/rejected": -0.2671058773994446, "rewards/safe_rewards": -0.22719350457191467, "rewards/unsafe_rewards": -0.2366548329591751, "step": 4100 }, { "epoch": 0.88, "learning_rate": 2.0067473526952288e-08, "logits/chosen": -1.5121644735336304, "logits/rejected": -1.672772765159607, "logps/chosen": -205.19656372070312, "logps/rejected": -177.78427124023438, "loss": 4727.5754, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.20926113426685333, "rewards/margins": 0.04061561077833176, "rewards/rejected": -0.2498767375946045, "rewards/safe_rewards": -0.21143916249275208, "rewards/unsafe_rewards": -0.2234780341386795, "step": 4110 }, { "epoch": 0.89, "learning_rate": 1.933673951162923e-08, "logits/chosen": -1.5162945985794067, "logits/rejected": -1.6321182250976562, "logps/chosen": -207.9658203125, "logps/rejected": -179.07313537597656, "loss": 4461.6125, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.20448677241802216, "rewards/margins": 0.035980213433504105, "rewards/rejected": -0.24046699702739716, "rewards/safe_rewards": -0.20977330207824707, "rewards/unsafe_rewards": -0.20152127742767334, "step": 4120 }, { "epoch": 0.89, "learning_rate": 1.8619022411417968e-08, "logits/chosen": -1.5268123149871826, "logits/rejected": -1.7471297979354858, "logps/chosen": -199.77569580078125, "logps/rejected": -167.7947235107422, "loss": 4440.393, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.2128031700849533, "rewards/margins": 0.05113289877772331, "rewards/rejected": -0.26393604278564453, "rewards/safe_rewards": -0.20926113426685333, "rewards/unsafe_rewards": -0.21085695922374725, "step": 4130 }, { "epoch": 0.89, "learning_rate": 1.7914362728921856e-08, "logits/chosen": -1.5482009649276733, "logits/rejected": -1.7027896642684937, "logps/chosen": -198.7368927001953, "logps/rejected": -174.98550415039062, "loss": 4415.4906, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.23056156933307648, "rewards/margins": 0.041701577603816986, "rewards/rejected": -0.2722631096839905, "rewards/safe_rewards": -0.23716148734092712, "rewards/unsafe_rewards": -0.23339839279651642, "step": 4140 }, { "epoch": 0.89, "learning_rate": 1.7222800229881123e-08, "logits/chosen": -1.519262433052063, "logits/rejected": -1.6434913873672485, "logps/chosen": -205.01522827148438, "logps/rejected": -177.63076782226562, "loss": 4461.1133, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.20467379689216614, "rewards/margins": 0.044587306678295135, "rewards/rejected": -0.24926109611988068, "rewards/safe_rewards": -0.17283745110034943, "rewards/unsafe_rewards": -0.19343654811382294, "step": 4150 }, { "epoch": 0.9, "learning_rate": 1.6544373940928365e-08, "logits/chosen": -1.539603590965271, "logits/rejected": -1.7172355651855469, "logps/chosen": -203.49978637695312, "logps/rejected": -175.1534881591797, "loss": 4214.7734, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2324318140745163, "rewards/margins": 0.03762615844607353, "rewards/rejected": -0.2700579762458801, "rewards/safe_rewards": -0.24496932327747345, "rewards/unsafe_rewards": -0.2495719939470291, "step": 4160 }, { "epoch": 0.9, "learning_rate": 1.587912214738654e-08, "logits/chosen": -1.544495701789856, "logits/rejected": -1.694962501525879, "logps/chosen": -216.84716796875, "logps/rejected": -179.02352905273438, "loss": 4363.5437, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21589577198028564, "rewards/margins": 0.04321189969778061, "rewards/rejected": -0.25910764932632446, "rewards/safe_rewards": -0.2176029235124588, "rewards/unsafe_rewards": -0.20830246806144714, "step": 4170 }, { "epoch": 0.9, "learning_rate": 1.522708239110826e-08, "logits/chosen": -1.546251893043518, "logits/rejected": -1.7255980968475342, "logps/chosen": -200.89883422851562, "logps/rejected": -173.24826049804688, "loss": 4448.4629, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.22694475948810577, "rewards/margins": 0.049153976142406464, "rewards/rejected": -0.27609875798225403, "rewards/safe_rewards": -0.2252611219882965, "rewards/unsafe_rewards": -0.2131088674068451, "step": 4180 }, { "epoch": 0.9, "learning_rate": 1.4588291468357216e-08, "logits/chosen": -1.533685326576233, "logits/rejected": -1.6862236261367798, "logps/chosen": -202.2422332763672, "logps/rejected": -171.79446411132812, "loss": 4573.1805, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.2144947499036789, "rewards/margins": 0.051170624792575836, "rewards/rejected": -0.26566535234451294, "rewards/safe_rewards": -0.2203047275543213, "rewards/unsafe_rewards": -0.2292819321155548, "step": 4190 }, { "epoch": 0.9, "learning_rate": 1.396278542773166e-08, "logits/chosen": -1.5443111658096313, "logits/rejected": -1.6806228160858154, "logps/chosen": -193.24313354492188, "logps/rejected": -172.92367553710938, "loss": 4648.4578, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.22670194506645203, "rewards/margins": 0.04261786863207817, "rewards/rejected": -0.2693198323249817, "rewards/safe_rewards": -0.22814902663230896, "rewards/unsafe_rewards": -0.22405500710010529, "step": 4200 }, { "epoch": 0.91, "learning_rate": 1.3350599568130172e-08, "logits/chosen": -1.5299437046051025, "logits/rejected": -1.6678447723388672, "logps/chosen": -209.5962371826172, "logps/rejected": -179.04458618164062, "loss": 4389.0832, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.22599825263023376, "rewards/margins": 0.03723657876253128, "rewards/rejected": -0.26323485374450684, "rewards/safe_rewards": -0.22955498099327087, "rewards/unsafe_rewards": -0.2490617334842682, "step": 4210 }, { "epoch": 0.91, "learning_rate": 1.2751768436759597e-08, "logits/chosen": -1.573338270187378, "logits/rejected": -1.7038114070892334, "logps/chosen": -192.61300659179688, "logps/rejected": -167.113037109375, "loss": 4450.8813, "rewards/accuracies": 0.625, "rewards/chosen": -0.23437383770942688, "rewards/margins": 0.032670460641384125, "rewards/rejected": -0.267044335603714, "rewards/safe_rewards": -0.2344360053539276, "rewards/unsafe_rewards": -0.23572202026844025, "step": 4220 }, { "epoch": 0.91, "learning_rate": 1.2166325827185381e-08, "logits/chosen": -1.540734052658081, "logits/rejected": -1.6662623882293701, "logps/chosen": -196.95108032226562, "logps/rejected": -175.09524536132812, "loss": 4586.8227, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.2177889347076416, "rewards/margins": 0.04984976723790169, "rewards/rejected": -0.2676386833190918, "rewards/safe_rewards": -0.2173369824886322, "rewards/unsafe_rewards": -0.2277562916278839, "step": 4230 }, { "epoch": 0.91, "learning_rate": 1.159430477742468e-08, "logits/chosen": -1.5400536060333252, "logits/rejected": -1.660771369934082, "logps/chosen": -208.582763671875, "logps/rejected": -182.1627655029297, "loss": 4461.1719, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20761707425117493, "rewards/margins": 0.04694581404328346, "rewards/rejected": -0.2545629143714905, "rewards/safe_rewards": -0.18549469113349915, "rewards/unsafe_rewards": -0.19585414230823517, "step": 4240 }, { "epoch": 0.91, "learning_rate": 1.1035737568081904e-08, "logits/chosen": -1.5536563396453857, "logits/rejected": -1.703874945640564, "logps/chosen": -203.28794860839844, "logps/rejected": -170.15109252929688, "loss": 4204.727, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.22561001777648926, "rewards/margins": 0.032753050327301025, "rewards/rejected": -0.2583630681037903, "rewards/safe_rewards": -0.2299257218837738, "rewards/unsafe_rewards": -0.22964270412921906, "step": 4250 }, { "epoch": 0.92, "learning_rate": 1.0490655720526792e-08, "logits/chosen": -1.5516449213027954, "logits/rejected": -1.7312133312225342, "logps/chosen": -201.54092407226562, "logps/rejected": -168.93801879882812, "loss": 4682.7906, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.2407626211643219, "rewards/margins": 0.031089726835489273, "rewards/rejected": -0.2718523442745209, "rewards/safe_rewards": -0.22686032950878143, "rewards/unsafe_rewards": -0.2263687551021576, "step": 4260 }, { "epoch": 0.92, "learning_rate": 9.959089995115994e-09, "logits/chosen": -1.5342925786972046, "logits/rejected": -1.719836950302124, "logps/chosen": -207.8533477783203, "logps/rejected": -173.6595458984375, "loss": 4546.6949, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.21739885210990906, "rewards/margins": 0.041012898087501526, "rewards/rejected": -0.2584117650985718, "rewards/safe_rewards": -0.2219989001750946, "rewards/unsafe_rewards": -0.20379142463207245, "step": 4270 }, { "epoch": 0.92, "learning_rate": 9.44107038945688e-09, "logits/chosen": -1.5936216115951538, "logits/rejected": -1.7446895837783813, "logps/chosen": -205.1503448486328, "logps/rejected": -170.57162475585938, "loss": 4357.159, "rewards/accuracies": 0.625, "rewards/chosen": -0.23411054909229279, "rewards/margins": 0.028366338461637497, "rewards/rejected": -0.2624768912792206, "rewards/safe_rewards": -0.256578266620636, "rewards/unsafe_rewards": -0.2524385452270508, "step": 4280 }, { "epoch": 0.92, "learning_rate": 8.936626136714753e-09, "logits/chosen": -1.5413211584091187, "logits/rejected": -1.702247977256775, "logps/chosen": -202.26254272460938, "logps/rejected": -174.92318725585938, "loss": 4607.9031, "rewards/accuracies": 0.625, "rewards/chosen": -0.22359450161457062, "rewards/margins": 0.03811618685722351, "rewards/rejected": -0.26171064376831055, "rewards/safe_rewards": -0.2341107577085495, "rewards/unsafe_rewards": -0.23293955624103546, "step": 4290 }, { "epoch": 0.93, "learning_rate": 8.44578570396326e-09, "logits/chosen": -1.5220444202423096, "logits/rejected": -1.6292047500610352, "logps/chosen": -206.94992065429688, "logps/rejected": -186.24903869628906, "loss": 4408.2602, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.22363758087158203, "rewards/margins": 0.03966961428523064, "rewards/rejected": -0.26330721378326416, "rewards/safe_rewards": -0.2009374350309372, "rewards/unsafe_rewards": -0.20961937308311462, "step": 4300 }, { "epoch": 0.93, "learning_rate": 7.968576790577802e-09, "logits/chosen": -1.547127366065979, "logits/rejected": -1.7035160064697266, "logps/chosen": -206.48672485351562, "logps/rejected": -181.2369842529297, "loss": 4330.9066, "rewards/accuracies": 0.65625, "rewards/chosen": -0.23172064125537872, "rewards/margins": 0.029529940336942673, "rewards/rejected": -0.2612505555152893, "rewards/safe_rewards": -0.21161434054374695, "rewards/unsafe_rewards": -0.2182123214006424, "step": 4310 }, { "epoch": 0.93, "learning_rate": 7.505026326672492e-09, "logits/chosen": -1.5171513557434082, "logits/rejected": -1.6687825918197632, "logps/chosen": -202.39492797851562, "logps/rejected": -178.00820922851562, "loss": 4666.9668, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21068453788757324, "rewards/margins": 0.041193027049303055, "rewards/rejected": -0.2518775761127472, "rewards/safe_rewards": -0.2034362107515335, "rewards/unsafe_rewards": -0.20642109215259552, "step": 4320 }, { "epoch": 0.93, "learning_rate": 7.055160471580224e-09, "logits/chosen": -1.529333233833313, "logits/rejected": -1.6757583618164062, "logps/chosen": -206.0924835205078, "logps/rejected": -175.94998168945312, "loss": 4356.7973, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.20352426171302795, "rewards/margins": 0.05208639055490494, "rewards/rejected": -0.2556106448173523, "rewards/safe_rewards": -0.19262734055519104, "rewards/unsafe_rewards": -0.2056398093700409, "step": 4330 }, { "epoch": 0.93, "learning_rate": 6.619004612376717e-09, "logits/chosen": -1.5720365047454834, "logits/rejected": -1.6947011947631836, "logps/chosen": -194.35650634765625, "logps/rejected": -169.61978149414062, "loss": 4436.5492, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21772924065589905, "rewards/margins": 0.051900286227464676, "rewards/rejected": -0.2696295380592346, "rewards/safe_rewards": -0.20219890773296356, "rewards/unsafe_rewards": -0.21767482161521912, "step": 4340 }, { "epoch": 0.94, "learning_rate": 6.196583362447688e-09, "logits/chosen": -1.5513790845870972, "logits/rejected": -1.664562463760376, "logps/chosen": -206.83602905273438, "logps/rejected": -178.6460723876953, "loss": 4680.382, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.2238740473985672, "rewards/margins": 0.03266584128141403, "rewards/rejected": -0.256539911031723, "rewards/safe_rewards": -0.21621663868427277, "rewards/unsafe_rewards": -0.23178717494010925, "step": 4350 }, { "epoch": 0.94, "learning_rate": 5.7879205600998296e-09, "logits/chosen": -1.5511419773101807, "logits/rejected": -1.7282922267913818, "logps/chosen": -201.13812255859375, "logps/rejected": -168.7653350830078, "loss": 4438.5867, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.23417536914348602, "rewards/margins": 0.03614838421344757, "rewards/rejected": -0.2703237533569336, "rewards/safe_rewards": -0.25049442052841187, "rewards/unsafe_rewards": -0.2440376579761505, "step": 4360 }, { "epoch": 0.94, "learning_rate": 5.393039267215627e-09, "logits/chosen": -1.526679277420044, "logits/rejected": -1.6401304006576538, "logps/chosen": -196.14584350585938, "logps/rejected": -172.19378662109375, "loss": 4748.268, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.2357463389635086, "rewards/margins": 0.04006447643041611, "rewards/rejected": -0.2758108377456665, "rewards/safe_rewards": -0.22228869795799255, "rewards/unsafe_rewards": -0.2542802095413208, "step": 4370 }, { "epoch": 0.94, "learning_rate": 5.01196176795185e-09, "logits/chosen": -1.5239098072052002, "logits/rejected": -1.6949926614761353, "logps/chosen": -206.1194305419922, "logps/rejected": -173.067626953125, "loss": 4489.8125, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.22173237800598145, "rewards/margins": 0.05162891745567322, "rewards/rejected": -0.2733612656593323, "rewards/safe_rewards": -0.22374515235424042, "rewards/unsafe_rewards": -0.21424159407615662, "step": 4380 }, { "epoch": 0.94, "learning_rate": 4.644709567482169e-09, "logits/chosen": -1.5908830165863037, "logits/rejected": -1.7216293811798096, "logps/chosen": -202.15023803710938, "logps/rejected": -172.44723510742188, "loss": 4484.1391, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22054632008075714, "rewards/margins": 0.05974392220377922, "rewards/rejected": -0.28029024600982666, "rewards/safe_rewards": -0.20276649296283722, "rewards/unsafe_rewards": -0.22382768988609314, "step": 4390 }, { "epoch": 0.95, "learning_rate": 4.291303390783346e-09, "logits/chosen": -1.5407321453094482, "logits/rejected": -1.638816475868225, "logps/chosen": -204.30056762695312, "logps/rejected": -183.88717651367188, "loss": 4378.6922, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.23403939604759216, "rewards/margins": 0.02884838357567787, "rewards/rejected": -0.26288777589797974, "rewards/safe_rewards": -0.2432643622159958, "rewards/unsafe_rewards": -0.21748991310596466, "step": 4400 }, { "epoch": 0.95, "learning_rate": 3.951763181465756e-09, "logits/chosen": -1.547709345817566, "logits/rejected": -1.684533715248108, "logps/chosen": -192.23397827148438, "logps/rejected": -167.48971557617188, "loss": 4285.2215, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2259986400604248, "rewards/margins": 0.049560967832803726, "rewards/rejected": -0.27555960416793823, "rewards/safe_rewards": -0.2212650030851364, "rewards/unsafe_rewards": -0.21501357853412628, "step": 4410 }, { "epoch": 0.95, "learning_rate": 3.6261081006480065e-09, "logits/chosen": -1.6037817001342773, "logits/rejected": -1.7418960332870483, "logps/chosen": -195.6244659423828, "logps/rejected": -168.18447875976562, "loss": 4044.2965, "rewards/accuracies": 0.65625, "rewards/chosen": -0.23674693703651428, "rewards/margins": 0.045795343816280365, "rewards/rejected": -0.28254228830337524, "rewards/safe_rewards": -0.2371511459350586, "rewards/unsafe_rewards": -0.25264227390289307, "step": 4420 }, { "epoch": 0.95, "learning_rate": 3.3143565258754448e-09, "logits/chosen": -1.545133113861084, "logits/rejected": -1.6558042764663696, "logps/chosen": -195.68601989746094, "logps/rejected": -171.66075134277344, "loss": 4571.423, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.22517141699790955, "rewards/margins": 0.03568132966756821, "rewards/rejected": -0.26085275411605835, "rewards/safe_rewards": -0.21333293616771698, "rewards/unsafe_rewards": -0.232491135597229, "step": 4430 }, { "epoch": 0.96, "learning_rate": 3.0165260500832625e-09, "logits/chosen": -1.5443904399871826, "logits/rejected": -1.6798639297485352, "logps/chosen": -202.24758911132812, "logps/rejected": -179.0845489501953, "loss": 4574.2883, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2252836674451828, "rewards/margins": 0.0314502976834774, "rewards/rejected": -0.2567339539527893, "rewards/safe_rewards": -0.22800619900226593, "rewards/unsafe_rewards": -0.2374502718448639, "step": 4440 }, { "epoch": 0.96, "learning_rate": 2.732633480603569e-09, "logits/chosen": -1.5942058563232422, "logits/rejected": -1.7432552576065063, "logps/chosen": -204.35781860351562, "logps/rejected": -170.653564453125, "loss": 4557.0211, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.21791794896125793, "rewards/margins": 0.04766512289643288, "rewards/rejected": -0.2655830681324005, "rewards/safe_rewards": -0.21400025486946106, "rewards/unsafe_rewards": -0.22362348437309265, "step": 4450 }, { "epoch": 0.96, "learning_rate": 2.4626948382168723e-09, "logits/chosen": -1.5271507501602173, "logits/rejected": -1.6874616146087646, "logps/chosen": -203.99830627441406, "logps/rejected": -178.9078369140625, "loss": 4178.2516, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.22850441932678223, "rewards/margins": 0.0430399626493454, "rewards/rejected": -0.27154436707496643, "rewards/safe_rewards": -0.23654106259346008, "rewards/unsafe_rewards": -0.2442520409822464, "step": 4460 }, { "epoch": 0.96, "learning_rate": 2.206725356248135e-09, "logits/chosen": -1.5417709350585938, "logits/rejected": -1.6948795318603516, "logps/chosen": -198.9254913330078, "logps/rejected": -173.546142578125, "loss": 4277.4762, "rewards/accuracies": 0.59375, "rewards/chosen": -0.21901535987854004, "rewards/margins": 0.03889850527048111, "rewards/rejected": -0.25791388750076294, "rewards/safe_rewards": -0.2173583060503006, "rewards/unsafe_rewards": -0.22352378070354462, "step": 4470 }, { "epoch": 0.96, "learning_rate": 1.9647394797069615e-09, "logits/chosen": -1.5543997287750244, "logits/rejected": -1.6949079036712646, "logps/chosen": -200.18955993652344, "logps/rejected": -166.99183654785156, "loss": 4580.9516, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.225600928068161, "rewards/margins": 0.049252092838287354, "rewards/rejected": -0.27485305070877075, "rewards/safe_rewards": -0.2416580617427826, "rewards/unsafe_rewards": -0.24350671470165253, "step": 4480 }, { "epoch": 0.97, "learning_rate": 1.7367508644725026e-09, "logits/chosen": -1.5280859470367432, "logits/rejected": -1.6739555597305298, "logps/chosen": -219.5941619873047, "logps/rejected": -183.71591186523438, "loss": 4532.4812, "rewards/accuracies": 0.65625, "rewards/chosen": -0.20959798991680145, "rewards/margins": 0.040694497525691986, "rewards/rejected": -0.25029247999191284, "rewards/safe_rewards": -0.23201878368854523, "rewards/unsafe_rewards": -0.21386778354644775, "step": 4490 }, { "epoch": 0.97, "learning_rate": 1.5227723765228462e-09, "logits/chosen": -1.5422213077545166, "logits/rejected": -1.6634289026260376, "logps/chosen": -197.53561401367188, "logps/rejected": -170.76748657226562, "loss": 4747.2375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2148609161376953, "rewards/margins": 0.04285234957933426, "rewards/rejected": -0.257713258266449, "rewards/safe_rewards": -0.20181219279766083, "rewards/unsafe_rewards": -0.20071211457252502, "step": 4500 }, { "epoch": 0.97, "eval_logits/chosen": -1.5837669372558594, "eval_logits/rejected": -1.7302347421646118, "eval_logps/chosen": -202.47447204589844, "eval_logps/rejected": -174.72433471679688, "eval_loss": 4659.36865234375, "eval_rewards/accuracies": 0.6362536549568176, "eval_rewards/chosen": -0.22658397257328033, "eval_rewards/margins": 0.03719181567430496, "eval_rewards/rejected": -0.2637757956981659, "eval_rewards/safe_rewards": -0.2245582491159439, "eval_rewards/unsafe_rewards": -0.22446152567863464, "eval_runtime": 1212.34, "eval_samples_per_second": 27.256, "eval_steps_per_second": 0.852, "step": 4500 }, { "epoch": 0.97, "learning_rate": 1.322816091208906e-09, "logits/chosen": -1.5347139835357666, "logits/rejected": -1.6895307302474976, "logps/chosen": -205.2490997314453, "logps/rejected": -180.6209716796875, "loss": 4309.8016, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.22458508610725403, "rewards/margins": 0.05188177898526192, "rewards/rejected": -0.27646684646606445, "rewards/safe_rewards": -0.23022957146167755, "rewards/unsafe_rewards": -0.248606875538826, "step": 4510 }, { "epoch": 0.97, "learning_rate": 1.1368932925730212e-09, "logits/chosen": -1.5477070808410645, "logits/rejected": -1.7129848003387451, "logps/chosen": -201.37086486816406, "logps/rejected": -173.3863525390625, "loss": 5005.2855, "rewards/accuracies": 0.6875, "rewards/chosen": -0.21432733535766602, "rewards/margins": 0.05523134022951126, "rewards/rejected": -0.2695586681365967, "rewards/safe_rewards": -0.20947813987731934, "rewards/unsafe_rewards": -0.2180863916873932, "step": 4520 }, { "epoch": 0.97, "learning_rate": 9.650144727120502e-10, "logits/chosen": -1.4965711832046509, "logits/rejected": -1.7335584163665771, "logps/chosen": -223.14920043945312, "logps/rejected": -178.56631469726562, "loss": 4081.943, "rewards/accuracies": 0.71875, "rewards/chosen": -0.20390522480010986, "rewards/margins": 0.05761183053255081, "rewards/rejected": -0.2615170478820801, "rewards/safe_rewards": -0.18720386922359467, "rewards/unsafe_rewards": -0.1937686949968338, "step": 4530 }, { "epoch": 0.98, "learning_rate": 8.071893311855094e-10, "logits/chosen": -1.499099612236023, "logits/rejected": -1.625657320022583, "logps/chosen": -205.8462677001953, "logps/rejected": -182.3321533203125, "loss": 4424.7332, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.21618399024009705, "rewards/margins": 0.03759055584669113, "rewards/rejected": -0.25377458333969116, "rewards/safe_rewards": -0.2258019894361496, "rewards/unsafe_rewards": -0.22823485732078552, "step": 4540 }, { "epoch": 0.98, "learning_rate": 6.634267744679845e-10, "logits/chosen": -1.5378913879394531, "logits/rejected": -1.6953462362289429, "logps/chosen": -209.7748565673828, "logps/rejected": -178.35562133789062, "loss": 4600.5941, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2141757309436798, "rewards/margins": 0.05765043944120407, "rewards/rejected": -0.2718261480331421, "rewards/safe_rewards": -0.21044981479644775, "rewards/unsafe_rewards": -0.22114381194114685, "step": 4550 }, { "epoch": 0.98, "learning_rate": 5.337349154465598e-10, "logits/chosen": -1.5890424251556396, "logits/rejected": -1.7180182933807373, "logps/chosen": -196.7635498046875, "logps/rejected": -174.26890563964844, "loss": 4522.6297, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.23434624075889587, "rewards/margins": 0.02388044074177742, "rewards/rejected": -0.2582266926765442, "rewards/safe_rewards": -0.2597582936286926, "rewards/unsafe_rewards": -0.22795064747333527, "step": 4560 }, { "epoch": 0.98, "learning_rate": 4.1812107296312904e-10, "logits/chosen": -1.5449397563934326, "logits/rejected": -1.6918556690216064, "logps/chosen": -202.8343963623047, "logps/rejected": -180.87103271484375, "loss": 4403.4859, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.21885719895362854, "rewards/margins": 0.03112492524087429, "rewards/rejected": -0.249982088804245, "rewards/safe_rewards": -0.22675243020057678, "rewards/unsafe_rewards": -0.23570093512535095, "step": 4570 }, { "epoch": 0.99, "learning_rate": 3.1659177140114233e-10, "logits/chosen": -1.553341269493103, "logits/rejected": -1.7105824947357178, "logps/chosen": -201.67880249023438, "logps/rejected": -171.5751953125, "loss": 4585.3344, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.21815869212150574, "rewards/margins": 0.04959379509091377, "rewards/rejected": -0.267752468585968, "rewards/safe_rewards": -0.20290926098823547, "rewards/unsafe_rewards": -0.21184396743774414, "step": 4580 }, { "epoch": 0.99, "learning_rate": 2.2915274031765075e-10, "logits/chosen": -1.5926859378814697, "logits/rejected": -1.7431867122650146, "logps/chosen": -207.72470092773438, "logps/rejected": -175.99783325195312, "loss": 4565.5637, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.22912725806236267, "rewards/margins": 0.03090088441967964, "rewards/rejected": -0.2600281536579132, "rewards/safe_rewards": -0.22379723191261292, "rewards/unsafe_rewards": -0.22147789597511292, "step": 4590 }, { "epoch": 0.99, "learning_rate": 1.558089141198149e-10, "logits/chosen": -1.5660661458969116, "logits/rejected": -1.6870187520980835, "logps/chosen": -194.3837432861328, "logps/rejected": -174.43838500976562, "loss": 4484.2051, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.21330666542053223, "rewards/margins": 0.04991989955306053, "rewards/rejected": -0.26322656869888306, "rewards/safe_rewards": -0.20177456736564636, "rewards/unsafe_rewards": -0.21710829436779022, "step": 4600 }, { "epoch": 0.99, "learning_rate": 9.656443178646111e-11, "logits/chosen": -1.5467939376831055, "logits/rejected": -1.7141246795654297, "logps/chosen": -207.8561553955078, "logps/rejected": -173.6125030517578, "loss": 4294.5969, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21252372860908508, "rewards/margins": 0.05171266198158264, "rewards/rejected": -0.2642363905906677, "rewards/safe_rewards": -0.21984831988811493, "rewards/unsafe_rewards": -0.2205418050289154, "step": 4610 }, { "epoch": 0.99, "learning_rate": 5.142263663460156e-11, "logits/chosen": -1.5189037322998047, "logits/rejected": -1.6757428646087646, "logps/chosen": -212.2366180419922, "logps/rejected": -178.4267578125, "loss": 4388.1742, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.22604885697364807, "rewards/margins": 0.027724748477339745, "rewards/rejected": -0.25377362966537476, "rewards/safe_rewards": -0.20852234959602356, "rewards/unsafe_rewards": -0.23138685524463654, "step": 4620 }, { "epoch": 1.0, "learning_rate": 2.038607613066845e-11, "logits/chosen": -1.5244616270065308, "logits/rejected": -1.6479650735855103, "logps/chosen": -210.43038940429688, "logps/rejected": -181.93429565429688, "loss": 4359.9695, "rewards/accuracies": 0.65625, "rewards/chosen": -0.2269216775894165, "rewards/margins": 0.04017435386776924, "rewards/rejected": -0.26709604263305664, "rewards/safe_rewards": -0.22515110671520233, "rewards/unsafe_rewards": -0.22205443680286407, "step": 4630 }, { "epoch": 1.0, "learning_rate": 3.456501746823548e-12, "logits/chosen": -1.548797845840454, "logits/rejected": -1.6843984127044678, "logps/chosen": -207.6051483154297, "logps/rejected": -182.41134643554688, "loss": 4301.7863, "rewards/accuracies": 0.71875, "rewards/chosen": -0.21986225247383118, "rewards/margins": 0.059041671454906464, "rewards/rejected": -0.27890393137931824, "rewards/safe_rewards": -0.20884303748607635, "rewards/unsafe_rewards": -0.23513726890087128, "step": 4640 }, { "epoch": 1.0, "step": 4647, "total_flos": 0.0, "train_loss": 4602.791315297503, "train_runtime": 29489.6101, "train_samples_per_second": 10.085, "train_steps_per_second": 0.158 } ], "logging_steps": 10, "max_steps": 4647, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }