diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3060 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9995965030262273, + "eval_steps": 500, + "global_step": 1858, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.6881720430107528e-09, + "logits/chosen": -2.5808520317077637, + "logits/rejected": -2.0101242065429688, + "logps/chosen": -299.3489990234375, + "logps/rejected": -186.63014221191406, + "loss": 57500.0, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "rewards/safe_rewards": 0.0, + "rewards/unsafe_rewards": 0.0, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 2.6881720430107527e-08, + "logits/chosen": -2.3875465393066406, + "logits/rejected": -2.228888988494873, + "logps/chosen": -201.83912658691406, + "logps/rejected": -189.46728515625, + "loss": 62427.4167, + "rewards/accuracies": 0.4097222089767456, + "rewards/chosen": -4.808326775673777e-05, + "rewards/margins": -0.0001756605488481, + "rewards/rejected": 0.0001275773101951927, + "rewards/safe_rewards": 9.244734974345192e-05, + "rewards/unsafe_rewards": -0.00018861386342905462, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5.3763440860215054e-08, + "logits/chosen": -2.348914861679077, + "logits/rejected": -2.053725481033325, + "logps/chosen": -226.295166015625, + "logps/rejected": -181.17727661132812, + "loss": 63344.4, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": 5.038898598286323e-05, + "rewards/margins": 0.00013641553232446313, + "rewards/rejected": -8.602657908340916e-05, + "rewards/safe_rewards": 0.00015285788686014712, + "rewards/unsafe_rewards": -5.207990761846304e-05, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 8.064516129032257e-08, + "logits/chosen": -2.340607166290283, + "logits/rejected": -2.1460487842559814, + "logps/chosen": -215.04421997070312, + "logps/rejected": -189.28939819335938, + "loss": 62107.7625, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": 0.00013389784726314247, + "rewards/margins": 0.00038347922964021564, + "rewards/rejected": -0.00024958146968856454, + "rewards/safe_rewards": -1.909069214889314e-05, + "rewards/unsafe_rewards": 0.0002868864103220403, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 1.0752688172043011e-07, + "logits/chosen": -2.2774696350097656, + "logits/rejected": -1.9751331806182861, + "logps/chosen": -180.73512268066406, + "logps/rejected": -173.91610717773438, + "loss": 62107.2312, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.5421304624396726e-06, + "rewards/margins": 0.0015016502002254128, + "rewards/rejected": -0.0015031921211630106, + "rewards/safe_rewards": 0.0006858192500658333, + "rewards/unsafe_rewards": -0.0006889032083563507, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 1.3440860215053762e-07, + "logits/chosen": -2.405261516571045, + "logits/rejected": -2.0346927642822266, + "logps/chosen": -209.60342407226562, + "logps/rejected": -167.78030395507812, + "loss": 62543.55, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": 0.000747154001146555, + "rewards/margins": 0.0053098187781870365, + "rewards/rejected": -0.004562664777040482, + "rewards/safe_rewards": 0.000191622442798689, + "rewards/unsafe_rewards": 0.0013026855885982513, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 1.6129032258064515e-07, + "logits/chosen": -2.3338699340820312, + "logits/rejected": -2.1591312885284424, + "logps/chosen": -186.08302307128906, + "logps/rejected": -185.45318603515625, + "loss": 62150.3625, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.004001402761787176, + "rewards/margins": 0.0031907681841403246, + "rewards/rejected": -0.007192172110080719, + "rewards/safe_rewards": -0.003774217562749982, + "rewards/unsafe_rewards": -0.004228588659316301, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 1.8817204301075268e-07, + "logits/chosen": -2.336559295654297, + "logits/rejected": -2.085585117340088, + "logps/chosen": -203.3507537841797, + "logps/rejected": -185.43138122558594, + "loss": 60588.0625, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.011643586680293083, + "rewards/margins": 0.008454794064164162, + "rewards/rejected": -0.020098382607102394, + "rewards/safe_rewards": -0.012982705608010292, + "rewards/unsafe_rewards": -0.010304470546543598, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 2.1505376344086022e-07, + "logits/chosen": -2.340780019760132, + "logits/rejected": -2.1255030632019043, + "logps/chosen": -223.27658081054688, + "logps/rejected": -198.85971069335938, + "loss": 60164.9062, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.03788214921951294, + "rewards/margins": 0.01990131102502346, + "rewards/rejected": -0.05778346210718155, + "rewards/safe_rewards": -0.04109570384025574, + "rewards/unsafe_rewards": -0.03466860204935074, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 2.4193548387096775e-07, + "logits/chosen": -2.328007698059082, + "logits/rejected": -2.131610155105591, + "logps/chosen": -215.45474243164062, + "logps/rejected": -177.22286987304688, + "loss": 61245.3187, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09539501368999481, + "rewards/margins": 0.0335100032389164, + "rewards/rejected": -0.1289050281047821, + "rewards/safe_rewards": -0.09368891268968582, + "rewards/unsafe_rewards": -0.0971011146903038, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 2.6881720430107523e-07, + "logits/chosen": -2.304769515991211, + "logits/rejected": -2.107332944869995, + "logps/chosen": -206.4906768798828, + "logps/rejected": -182.556884765625, + "loss": 60637.025, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10412251949310303, + "rewards/margins": 0.04963406175374985, + "rewards/rejected": -0.15375658869743347, + "rewards/safe_rewards": -0.1037391871213913, + "rewards/unsafe_rewards": -0.10450585931539536, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 2.956989247311828e-07, + "logits/chosen": -2.2178149223327637, + "logits/rejected": -1.9697818756103516, + "logps/chosen": -223.41415405273438, + "logps/rejected": -190.9380645751953, + "loss": 61145.6875, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.11467987298965454, + "rewards/margins": 0.07488114386796951, + "rewards/rejected": -0.18956100940704346, + "rewards/safe_rewards": -0.11105962097644806, + "rewards/unsafe_rewards": -0.11830013990402222, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 3.225806451612903e-07, + "logits/chosen": -2.2467641830444336, + "logits/rejected": -1.8920090198516846, + "logps/chosen": -226.39306640625, + "logps/rejected": -180.3076629638672, + "loss": 58378.2, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.1514355093240738, + "rewards/margins": 0.12331793457269669, + "rewards/rejected": -0.2747534513473511, + "rewards/safe_rewards": -0.14251373708248138, + "rewards/unsafe_rewards": -0.1603572815656662, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 3.4946236559139783e-07, + "logits/chosen": -2.191040515899658, + "logits/rejected": -1.9591535329818726, + "logps/chosen": -236.45516967773438, + "logps/rejected": -197.9637908935547, + "loss": 55939.4875, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.22272026538848877, + "rewards/margins": 0.13779091835021973, + "rewards/rejected": -0.3605111539363861, + "rewards/safe_rewards": -0.16373077034950256, + "rewards/unsafe_rewards": -0.2817097306251526, + "step": 130 + }, + { + "epoch": 0.08, + "learning_rate": 3.7634408602150537e-07, + "logits/chosen": -2.2190985679626465, + "logits/rejected": -1.8829628229141235, + "logps/chosen": -240.91574096679688, + "logps/rejected": -222.6839141845703, + "loss": 54515.35, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -0.29526057839393616, + "rewards/margins": 0.1311914026737213, + "rewards/rejected": -0.42645198106765747, + "rewards/safe_rewards": -0.2800997495651245, + "rewards/unsafe_rewards": -0.3104214072227478, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.0322580645161285e-07, + "logits/chosen": -2.1150035858154297, + "logits/rejected": -1.8470131158828735, + "logps/chosen": -255.7540740966797, + "logps/rejected": -230.04983520507812, + "loss": 54050.75, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.48809370398521423, + "rewards/margins": 0.11529085785150528, + "rewards/rejected": -0.6033845543861389, + "rewards/safe_rewards": -0.48700815439224243, + "rewards/unsafe_rewards": -0.48917922377586365, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.3010752688172043e-07, + "logits/chosen": -2.0716464519500732, + "logits/rejected": -1.737754464149475, + "logps/chosen": -251.8451385498047, + "logps/rejected": -239.3789520263672, + "loss": 56080.6813, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.3164382874965668, + "rewards/margins": 0.1803792268037796, + "rewards/rejected": -0.4968174993991852, + "rewards/safe_rewards": -0.2856782376766205, + "rewards/unsafe_rewards": -0.34719833731651306, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.569892473118279e-07, + "logits/chosen": -2.078756093978882, + "logits/rejected": -1.8125765323638916, + "logps/chosen": -263.3371887207031, + "logps/rejected": -232.22787475585938, + "loss": 55167.325, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.4896107316017151, + "rewards/margins": 0.1606103926897049, + "rewards/rejected": -0.6502211689949036, + "rewards/safe_rewards": -0.46941304206848145, + "rewards/unsafe_rewards": -0.5098084211349487, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.838709677419355e-07, + "logits/chosen": -2.0903422832489014, + "logits/rejected": -1.7388015985488892, + "logps/chosen": -236.2480926513672, + "logps/rejected": -234.4490203857422, + "loss": 54006.2625, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.44280552864074707, + "rewards/margins": 0.1890893578529358, + "rewards/rejected": -0.6318949460983276, + "rewards/safe_rewards": -0.4542728066444397, + "rewards/unsafe_rewards": -0.43133825063705444, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.999929391798331e-07, + "logits/chosen": -2.1477155685424805, + "logits/rejected": -1.7704684734344482, + "logps/chosen": -222.17318725585938, + "logps/rejected": -216.13729858398438, + "loss": 53827.3, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.27849894762039185, + "rewards/margins": 0.21106910705566406, + "rewards/rejected": -0.4895680844783783, + "rewards/safe_rewards": -0.2899102568626404, + "rewards/unsafe_rewards": -0.2670876979827881, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9991350953333e-07, + "logits/chosen": -2.0354604721069336, + "logits/rejected": -1.7123792171478271, + "logps/chosen": -266.3116149902344, + "logps/rejected": -268.6708984375, + "loss": 51894.3187, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.4659655690193176, + "rewards/margins": 0.20811092853546143, + "rewards/rejected": -0.6740764379501343, + "rewards/safe_rewards": -0.45449018478393555, + "rewards/unsafe_rewards": -0.47744083404541016, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.997458523498236e-07, + "logits/chosen": -2.1476428508758545, + "logits/rejected": -1.871840476989746, + "logps/chosen": -232.63418579101562, + "logps/rejected": -217.3137664794922, + "loss": 51921.0094, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.4048038125038147, + "rewards/margins": 0.1731196939945221, + "rewards/rejected": -0.5779234766960144, + "rewards/safe_rewards": -0.3647334575653076, + "rewards/unsafe_rewards": -0.44487419724464417, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.99490026817712e-07, + "logits/chosen": -2.1657705307006836, + "logits/rejected": -1.8895782232284546, + "logps/chosen": -232.4489288330078, + "logps/rejected": -237.7399139404297, + "loss": 52491.4437, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.38074105978012085, + "rewards/margins": 0.30026155710220337, + "rewards/rejected": -0.6810026168823242, + "rewards/safe_rewards": -0.35569635033607483, + "rewards/unsafe_rewards": -0.4057857394218445, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.991461232516674e-07, + "logits/chosen": -2.1992573738098145, + "logits/rejected": -1.8821046352386475, + "logps/chosen": -271.42498779296875, + "logps/rejected": -262.3030700683594, + "loss": 53356.15, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.41343992948532104, + "rewards/margins": 0.2318854033946991, + "rewards/rejected": -0.6453253030776978, + "rewards/safe_rewards": -0.42482924461364746, + "rewards/unsafe_rewards": -0.4020506739616394, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.98714263060751e-07, + "logits/chosen": -2.2992846965789795, + "logits/rejected": -1.9405317306518555, + "logps/chosen": -224.5162811279297, + "logps/rejected": -212.8382110595703, + "loss": 52005.075, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.36104291677474976, + "rewards/margins": 0.3008078336715698, + "rewards/rejected": -0.6618508100509644, + "rewards/safe_rewards": -0.36334308981895447, + "rewards/unsafe_rewards": -0.35874274373054504, + "step": 240 + }, + { + "epoch": 0.13, + "learning_rate": 4.98194598705552e-07, + "logits/chosen": -2.174473285675049, + "logits/rejected": -1.980419397354126, + "logps/chosen": -275.9315490722656, + "logps/rejected": -265.8156433105469, + "loss": 55769.3625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.862021803855896, + "rewards/margins": 0.12291105091571808, + "rewards/rejected": -0.9849328994750977, + "rewards/safe_rewards": -0.8367234468460083, + "rewards/unsafe_rewards": -0.887320339679718, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.975873136443648e-07, + "logits/chosen": -2.311676263809204, + "logits/rejected": -2.0300040245056152, + "logps/chosen": -285.49859619140625, + "logps/rejected": -277.3318176269531, + "loss": 49631.1438, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -0.5445571541786194, + "rewards/margins": 0.2634980380535126, + "rewards/rejected": -0.8080552220344543, + "rewards/safe_rewards": -0.6137005090713501, + "rewards/unsafe_rewards": -0.47541379928588867, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.968926222684212e-07, + "logits/chosen": -2.2334067821502686, + "logits/rejected": -2.0385944843292236, + "logps/chosen": -244.4452667236328, + "logps/rejected": -255.02474975585938, + "loss": 49048.6625, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.44241613149642944, + "rewards/margins": 0.2979922890663147, + "rewards/rejected": -0.7404084801673889, + "rewards/safe_rewards": -0.44607001543045044, + "rewards/unsafe_rewards": -0.4387623369693756, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.961107698262044e-07, + "logits/chosen": -2.1260902881622314, + "logits/rejected": -1.8227698802947998, + "logps/chosen": -310.117919921875, + "logps/rejected": -296.1424865722656, + "loss": 51637.0563, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.918570876121521, + "rewards/margins": 0.23532792925834656, + "rewards/rejected": -1.15389883518219, + "rewards/safe_rewards": -0.9110817909240723, + "rewards/unsafe_rewards": -0.926059901714325, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.952420323368673e-07, + "logits/chosen": -2.0757288932800293, + "logits/rejected": -1.8892370462417603, + "logps/chosen": -294.834716796875, + "logps/rejected": -308.68902587890625, + "loss": 53392.1125, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.034294605255127, + "rewards/margins": 0.24042804539203644, + "rewards/rejected": -1.2747225761413574, + "rewards/safe_rewards": -1.0596760511398315, + "rewards/unsafe_rewards": -1.0089129209518433, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.942867164927899e-07, + "logits/chosen": -2.0641281604766846, + "logits/rejected": -1.8295459747314453, + "logps/chosen": -263.4281005859375, + "logps/rejected": -258.68975830078125, + "loss": 54437.7188, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -0.6114200353622437, + "rewards/margins": 0.2835775315761566, + "rewards/rejected": -0.8949977159500122, + "rewards/safe_rewards": -0.606993556022644, + "rewards/unsafe_rewards": -0.6158466339111328, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.932451595513062e-07, + "logits/chosen": -2.1346993446350098, + "logits/rejected": -1.8092772960662842, + "logps/chosen": -278.9120178222656, + "logps/rejected": -281.7663879394531, + "loss": 46478.9031, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.5994301438331604, + "rewards/margins": 0.3355178236961365, + "rewards/rejected": -0.9349479675292969, + "rewards/safe_rewards": -0.6182785630226135, + "rewards/unsafe_rewards": -0.580581784248352, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.921177292156419e-07, + "logits/chosen": -2.1379427909851074, + "logits/rejected": -1.766617774963379, + "logps/chosen": -281.5938415527344, + "logps/rejected": -302.4753112792969, + "loss": 46446.8469, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.7557575106620789, + "rewards/margins": 0.4528188705444336, + "rewards/rejected": -1.2085764408111572, + "rewards/safe_rewards": -0.7365028858184814, + "rewards/unsafe_rewards": -0.7750122547149658, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.909048235051033e-07, + "logits/chosen": -1.9689319133758545, + "logits/rejected": -1.7293027639389038, + "logps/chosen": -270.5177001953125, + "logps/rejected": -287.52191162109375, + "loss": 49627.15, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.7110702395439148, + "rewards/margins": 0.33747392892837524, + "rewards/rejected": -1.04854416847229, + "rewards/safe_rewards": -0.6978198885917664, + "rewards/unsafe_rewards": -0.7243207097053528, + "step": 330 + }, + { + "epoch": 0.18, + "learning_rate": 4.896068706145631e-07, + "logits/chosen": -1.9467958211898804, + "logits/rejected": -1.6004358530044556, + "logps/chosen": -312.41693115234375, + "logps/rejected": -292.352294921875, + "loss": 51177.5687, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -1.0172303915023804, + "rewards/margins": 0.2995308041572571, + "rewards/rejected": -1.3167612552642822, + "rewards/safe_rewards": -1.009489893913269, + "rewards/unsafe_rewards": -1.0249707698822021, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.882243287632946e-07, + "logits/chosen": -1.9962276220321655, + "logits/rejected": -1.6522926092147827, + "logps/chosen": -299.3488464355469, + "logps/rejected": -314.05908203125, + "loss": 49325.6531, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.063424825668335, + "rewards/margins": 0.2652275562286377, + "rewards/rejected": -1.3286525011062622, + "rewards/safe_rewards": -1.0665223598480225, + "rewards/unsafe_rewards": -1.0603272914886475, + "step": 350 + }, + { + "epoch": 0.19, + "learning_rate": 4.867576860332048e-07, + "logits/chosen": -1.9703223705291748, + "logits/rejected": -1.6661205291748047, + "logps/chosen": -252.60049438476562, + "logps/rejected": -288.3544921875, + "loss": 46350.075, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.8054389953613281, + "rewards/margins": 0.3875662684440613, + "rewards/rejected": -1.1930052042007446, + "rewards/safe_rewards": -0.8658061027526855, + "rewards/unsafe_rewards": -0.7450717687606812, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.85207460196526e-07, + "logits/chosen": -1.9451444149017334, + "logits/rejected": -1.6176040172576904, + "logps/chosen": -318.0166931152344, + "logps/rejected": -335.5330505371094, + "loss": 49281.9, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -1.1363418102264404, + "rewards/margins": 0.3690471053123474, + "rewards/rejected": -1.505388855934143, + "rewards/safe_rewards": -1.1467530727386475, + "rewards/unsafe_rewards": -1.1259304285049438, + "step": 370 + }, + { + "epoch": 0.2, + "learning_rate": 4.835741985330259e-07, + "logits/chosen": -1.9350674152374268, + "logits/rejected": -1.5942466259002686, + "logps/chosen": -332.36407470703125, + "logps/rejected": -336.30780029296875, + "loss": 48804.5, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.3155324459075928, + "rewards/margins": 0.32836776971817017, + "rewards/rejected": -1.6439001560211182, + "rewards/safe_rewards": -1.2734438180923462, + "rewards/unsafe_rewards": -1.3576210737228394, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.818584776367992e-07, + "logits/chosen": -1.7757905721664429, + "logits/rejected": -1.5356546640396118, + "logps/chosen": -314.65533447265625, + "logps/rejected": -340.2844543457031, + "loss": 49077.6844, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.1075072288513184, + "rewards/margins": 0.3752915859222412, + "rewards/rejected": -1.48279869556427, + "rewards/safe_rewards": -1.135854959487915, + "rewards/unsafe_rewards": -1.0791594982147217, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.800609032127122e-07, + "logits/chosen": -1.7566020488739014, + "logits/rejected": -1.3898851871490479, + "logps/chosen": -321.32037353515625, + "logps/rejected": -315.85614013671875, + "loss": 51006.1062, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.1577603816986084, + "rewards/margins": 0.31263625621795654, + "rewards/rejected": -1.470396637916565, + "rewards/safe_rewards": -1.115960955619812, + "rewards/unsafe_rewards": -1.1995596885681152, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.78182109862569e-07, + "logits/chosen": -1.6898117065429688, + "logits/rejected": -1.5237857103347778, + "logps/chosen": -274.32318115234375, + "logps/rejected": -287.07659912109375, + "loss": 53293.7188, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.8292277455329895, + "rewards/margins": 0.24210748076438904, + "rewards/rejected": -1.0713351964950562, + "rewards/safe_rewards": -0.7710054516792297, + "rewards/unsafe_rewards": -0.887450098991394, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.7622276086107677e-07, + "logits/chosen": -1.534034013748169, + "logits/rejected": -1.0624725818634033, + "logps/chosen": -280.20538330078125, + "logps/rejected": -290.55352783203125, + "loss": 48796.6062, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.6991214752197266, + "rewards/margins": 0.34615251421928406, + "rewards/rejected": -1.045274019241333, + "rewards/safe_rewards": -0.7316769361495972, + "rewards/unsafe_rewards": -0.666566014289856, + "step": 420 + }, + { + "epoch": 0.23, + "learning_rate": 4.741835479216879e-07, + "logits/chosen": -1.1184804439544678, + "logits/rejected": -0.3043194115161896, + "logps/chosen": -342.1807556152344, + "logps/rejected": -334.1106872558594, + "loss": 48577.3688, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.9531019330024719, + "rewards/margins": 0.4116780757904053, + "rewards/rejected": -1.3647799491882324, + "rewards/safe_rewards": -1.0027388334274292, + "rewards/unsafe_rewards": -0.9034649133682251, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.720651909524036e-07, + "logits/chosen": -0.9799094200134277, + "logits/rejected": -0.3208684027194977, + "logps/chosen": -285.3372802734375, + "logps/rejected": -289.9998474121094, + "loss": 49106.4719, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8926190137863159, + "rewards/margins": 0.35036972165107727, + "rewards/rejected": -1.2429888248443604, + "rewards/safe_rewards": -0.9608098268508911, + "rewards/unsafe_rewards": -0.824428379535675, + "step": 440 + }, + { + "epoch": 0.24, + "learning_rate": 4.698684378016222e-07, + "logits/chosen": -0.904266357421875, + "logits/rejected": -0.32866841554641724, + "logps/chosen": -281.690185546875, + "logps/rejected": -301.1679382324219, + "loss": 49231.2063, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.9436728358268738, + "rewards/margins": 0.3516644537448883, + "rewards/rejected": -1.295337200164795, + "rewards/safe_rewards": -0.9335809946060181, + "rewards/unsafe_rewards": -0.9537646174430847, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.675940639941256e-07, + "logits/chosen": -0.9592872858047485, + "logits/rejected": -0.25786834955215454, + "logps/chosen": -301.5562744140625, + "logps/rejected": -315.85064697265625, + "loss": 47179.0125, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.9686962366104126, + "rewards/margins": 0.4161023199558258, + "rewards/rejected": -1.384798288345337, + "rewards/safe_rewards": -0.9365630149841309, + "rewards/unsafe_rewards": -1.0008293390274048, + "step": 460 + }, + { + "epoch": 0.25, + "learning_rate": 4.6524287245729286e-07, + "logits/chosen": -0.8068207502365112, + "logits/rejected": -0.3391101062297821, + "logps/chosen": -278.659912109375, + "logps/rejected": -285.12371826171875, + "loss": 47751.7312, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.8405793309211731, + "rewards/margins": 0.3460259437561035, + "rewards/rejected": -1.1866052150726318, + "rewards/safe_rewards": -0.9098204374313354, + "rewards/unsafe_rewards": -0.7713381052017212, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.628156932376418e-07, + "logits/chosen": -0.7944966554641724, + "logits/rejected": 0.08994200825691223, + "logps/chosen": -282.6773376464844, + "logps/rejected": -281.9779052734375, + "loss": 48467.7469, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.8880979418754578, + "rewards/margins": 0.4183935225009918, + "rewards/rejected": -1.3064913749694824, + "rewards/safe_rewards": -0.9278827905654907, + "rewards/unsafe_rewards": -0.8483129739761353, + "step": 480 + }, + { + "epoch": 0.26, + "learning_rate": 4.603133832077953e-07, + "logits/chosen": -0.8938484191894531, + "logits/rejected": -0.33665716648101807, + "logps/chosen": -326.38067626953125, + "logps/rejected": -354.91009521484375, + "loss": 46419.6, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.9370136260986328, + "rewards/margins": 0.46655893325805664, + "rewards/rejected": -1.403572678565979, + "rewards/safe_rewards": -0.9644339680671692, + "rewards/unsafe_rewards": -0.909593403339386, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.5773682576397776e-07, + "logits/chosen": -1.2619149684906006, + "logits/rejected": -0.8411065340042114, + "logps/chosen": -264.24420166015625, + "logps/rejected": -272.22503662109375, + "loss": 48290.35, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6434648036956787, + "rewards/margins": 0.3437257707118988, + "rewards/rejected": -0.9871906042098999, + "rewards/safe_rewards": -0.6787140369415283, + "rewards/unsafe_rewards": -0.6082155704498291, + "step": 500 + }, + { + "epoch": 0.27, + "eval_logits/chosen": -0.26756808161735535, + "eval_logits/rejected": 0.5179950594902039, + "eval_logps/chosen": -217.2423095703125, + "eval_logps/rejected": -213.39332580566406, + "eval_loss": 18207.427734375, + "eval_rewards/accuracies": 0.7307599186897278, + "eval_rewards/chosen": -0.8680341243743896, + "eval_rewards/margins": 0.3411865532398224, + "eval_rewards/rejected": -1.2092207670211792, + "eval_rewards/safe_rewards": -0.8647845387458801, + "eval_rewards/unsafe_rewards": -0.8682350516319275, + "eval_runtime": 1040.2175, + "eval_samples_per_second": 31.766, + "eval_steps_per_second": 0.993, + "step": 500 + }, + { + "epoch": 0.27, + "learning_rate": 4.5508693051414774e-07, + "logits/chosen": -1.2905457019805908, + "logits/rejected": -0.8728705644607544, + "logps/chosen": -265.3171691894531, + "logps/rejected": -283.68438720703125, + "loss": 47170.375, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.6896411776542664, + "rewards/margins": 0.3866831660270691, + "rewards/rejected": -1.0763243436813354, + "rewards/safe_rewards": -0.676253616809845, + "rewards/unsafe_rewards": -0.7030289173126221, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.52364632956877e-07, + "logits/chosen": -1.2021175622940063, + "logits/rejected": -0.6775351166725159, + "logps/chosen": -305.79876708984375, + "logps/rejected": -284.25921630859375, + "loss": 52579.1062, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.8966010212898254, + "rewards/margins": 0.2606201767921448, + "rewards/rejected": -1.1572210788726807, + "rewards/safe_rewards": -0.8755797147750854, + "rewards/unsafe_rewards": -0.9176222085952759, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.4957089415108895e-07, + "logits/chosen": -0.753202497959137, + "logits/rejected": -0.1237998977303505, + "logps/chosen": -294.72796630859375, + "logps/rejected": -330.95001220703125, + "loss": 46449.4938, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -0.9983027577400208, + "rewards/margins": 0.419041246175766, + "rewards/rejected": -1.4173439741134644, + "rewards/safe_rewards": -0.9940195083618164, + "rewards/unsafe_rewards": -1.002585768699646, + "step": 530 + }, + { + "epoch": 0.29, + "learning_rate": 4.467067003767745e-07, + "logits/chosen": -0.48180675506591797, + "logits/rejected": 0.48253804445266724, + "logps/chosen": -303.23883056640625, + "logps/rejected": -324.11090087890625, + "loss": 49649.5469, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.1032171249389648, + "rewards/margins": 0.4738823473453522, + "rewards/rejected": -1.5770995616912842, + "rewards/safe_rewards": -1.143731713294983, + "rewards/unsafe_rewards": -1.0627026557922363, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.437730627868027e-07, + "logits/chosen": -0.40449410676956177, + "logits/rejected": 0.6486467719078064, + "logps/chosen": -285.4818420410156, + "logps/rejected": -304.01727294921875, + "loss": 44164.2, + "rewards/accuracies": 0.793749988079071, + "rewards/chosen": -1.0856578350067139, + "rewards/margins": 0.49450111389160156, + "rewards/rejected": -1.5801591873168945, + "rewards/safe_rewards": -1.1111690998077393, + "rewards/unsafe_rewards": -1.0601468086242676, + "step": 550 + }, + { + "epoch": 0.3, + "learning_rate": 4.4077101704995163e-07, + "logits/chosen": -0.2658771872520447, + "logits/rejected": 0.5808793902397156, + "logps/chosen": -303.7386474609375, + "logps/rejected": -322.9274597167969, + "loss": 47530.7875, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.1163694858551025, + "rewards/margins": 0.37899577617645264, + "rewards/rejected": -1.4953652620315552, + "rewards/safe_rewards": -1.1057684421539307, + "rewards/unsafe_rewards": -1.1269704103469849, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.3770162298528356e-07, + "logits/chosen": 0.08729227632284164, + "logits/rejected": 1.0574349164962769, + "logps/chosen": -339.82366943359375, + "logps/rejected": -340.77117919921875, + "loss": 48790.9437, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.4495482444763184, + "rewards/margins": 0.41773146390914917, + "rewards/rejected": -1.8672796487808228, + "rewards/safe_rewards": -1.3902016878128052, + "rewards/unsafe_rewards": -1.508894681930542, + "step": 570 + }, + { + "epoch": 0.31, + "learning_rate": 4.3456596418799476e-07, + "logits/chosen": 0.07822632789611816, + "logits/rejected": 0.888980507850647, + "logps/chosen": -341.161865234375, + "logps/rejected": -346.3221740722656, + "loss": 44718.7562, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3741071224212646, + "rewards/margins": 0.3688550591468811, + "rewards/rejected": -1.7429622411727905, + "rewards/safe_rewards": -1.338965654373169, + "rewards/unsafe_rewards": -1.4092485904693604, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.313651476468715e-07, + "logits/chosen": -0.3907620906829834, + "logits/rejected": 0.42592954635620117, + "logps/chosen": -304.7808837890625, + "logps/rejected": -318.17120361328125, + "loss": 45170.7406, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.082128882408142, + "rewards/margins": 0.3731518089771271, + "rewards/rejected": -1.4552809000015259, + "rewards/safe_rewards": -1.146651029586792, + "rewards/unsafe_rewards": -1.0176069736480713, + "step": 590 + }, + { + "epoch": 0.32, + "learning_rate": 4.2810030335348693e-07, + "logits/chosen": -0.02207891270518303, + "logits/rejected": 1.0099096298217773, + "logps/chosen": -324.4931640625, + "logps/rejected": -322.2471618652344, + "loss": 46926.425, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.179164171218872, + "rewards/margins": 0.39374762773513794, + "rewards/rejected": -1.5729118585586548, + "rewards/safe_rewards": -1.1528512239456177, + "rewards/unsafe_rewards": -1.205476999282837, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.2477258390327806e-07, + "logits/chosen": 0.07391710579395294, + "logits/rejected": 1.108780026435852, + "logps/chosen": -301.9632263183594, + "logps/rejected": -344.14923095703125, + "loss": 44995.3313, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -1.1695501804351807, + "rewards/margins": 0.5345418453216553, + "rewards/rejected": -1.704092025756836, + "rewards/safe_rewards": -1.220637559890747, + "rewards/unsafe_rewards": -1.1184630393981934, + "step": 610 + }, + { + "epoch": 0.33, + "learning_rate": 4.2138316408864197e-07, + "logits/chosen": -0.31665921211242676, + "logits/rejected": 0.8195618391036987, + "logps/chosen": -297.1183166503906, + "logps/rejected": -324.0538635253906, + "loss": 42007.7375, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.9888391494750977, + "rewards/margins": 0.6273995637893677, + "rewards/rejected": -1.6162388324737549, + "rewards/safe_rewards": -0.9751203656196594, + "rewards/unsafe_rewards": -1.0025577545166016, + "step": 620 + }, + { + "epoch": 0.34, + "learning_rate": 4.179332404841962e-07, + "logits/chosen": -0.11831046640872955, + "logits/rejected": 1.0501253604888916, + "logps/chosen": -330.7760314941406, + "logps/rejected": -345.50927734375, + "loss": 45359.3, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.2064815759658813, + "rewards/margins": 0.532605767250061, + "rewards/rejected": -1.7390873432159424, + "rewards/safe_rewards": -1.2117804288864136, + "rewards/unsafe_rewards": -1.20118248462677, + "step": 630 + }, + { + "epoch": 0.34, + "learning_rate": 4.1442403102434954e-07, + "logits/chosen": -0.2538089454174042, + "logits/rejected": 0.7927336692810059, + "logps/chosen": -339.2408142089844, + "logps/rejected": -348.7007141113281, + "loss": 47515.475, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.19259512424469, + "rewards/margins": 0.5071938037872314, + "rewards/rejected": -1.699789047241211, + "rewards/safe_rewards": -1.191653847694397, + "rewards/unsafe_rewards": -1.1935365200042725, + "step": 640 + }, + { + "epoch": 0.35, + "learning_rate": 4.108567745733318e-07, + "logits/chosen": -0.1774824857711792, + "logits/rejected": 0.8802785873413086, + "logps/chosen": -287.61773681640625, + "logps/rejected": -317.7408142089844, + "loss": 48423.2156, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.141042709350586, + "rewards/margins": 0.4262182116508484, + "rewards/rejected": -1.5672608613967896, + "rewards/safe_rewards": -1.161741852760315, + "rewards/unsafe_rewards": -1.1203434467315674, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.0723273048783426e-07, + "logits/chosen": -0.008861498907208443, + "logits/rejected": 1.0686566829681396, + "logps/chosen": -327.5851135253906, + "logps/rejected": -318.5889892578125, + "loss": 48325.2937, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.0329101085662842, + "rewards/margins": 0.462179034948349, + "rewards/rejected": -1.4950892925262451, + "rewards/safe_rewards": -0.9797943234443665, + "rewards/unsafe_rewards": -1.0860260725021362, + "step": 660 + }, + { + "epoch": 0.36, + "learning_rate": 4.0355317817241697e-07, + "logits/chosen": 0.34657254815101624, + "logits/rejected": 1.7308372259140015, + "logps/chosen": -340.7425231933594, + "logps/rejected": -317.3548889160156, + "loss": 47926.7281, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.1180143356323242, + "rewards/margins": 0.44145455956459045, + "rewards/rejected": -1.5594687461853027, + "rewards/safe_rewards": -1.0670055150985718, + "rewards/unsafe_rewards": -1.1690229177474976, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 3.998194166278367e-07, + "logits/chosen": 0.778851330280304, + "logits/rejected": 1.7697913646697998, + "logps/chosen": -331.94720458984375, + "logps/rejected": -338.12164306640625, + "loss": 48389.5813, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.4065760374069214, + "rewards/margins": 0.3046923279762268, + "rewards/rejected": -1.711268424987793, + "rewards/safe_rewards": -1.4281235933303833, + "rewards/unsafe_rewards": -1.385028600692749, + "step": 680 + }, + { + "epoch": 0.37, + "learning_rate": 3.9603276399245855e-07, + "logits/chosen": 0.22816288471221924, + "logits/rejected": 1.5924437046051025, + "logps/chosen": -334.93927001953125, + "logps/rejected": -337.0348205566406, + "loss": 46915.8719, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -1.2231535911560059, + "rewards/margins": 0.4672882556915283, + "rewards/rejected": -1.6904417276382446, + "rewards/safe_rewards": -1.2139724493026733, + "rewards/unsafe_rewards": -1.2323347330093384, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 3.9219455707691e-07, + "logits/chosen": 0.402846097946167, + "logits/rejected": 1.474202036857605, + "logps/chosen": -323.0831604003906, + "logps/rejected": -335.8956604003906, + "loss": 46262.8812, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -1.192866325378418, + "rewards/margins": 0.4478531777858734, + "rewards/rejected": -1.6407196521759033, + "rewards/safe_rewards": -1.1731719970703125, + "rewards/unsafe_rewards": -1.2125608921051025, + "step": 700 + }, + { + "epoch": 0.38, + "learning_rate": 3.883061508921439e-07, + "logits/chosen": 0.026843935251235962, + "logits/rejected": 0.8126258850097656, + "logps/chosen": -309.677734375, + "logps/rejected": -349.9756164550781, + "loss": 46877.5938, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -1.1091785430908203, + "rewards/margins": 0.3808898627758026, + "rewards/rejected": -1.4900684356689453, + "rewards/safe_rewards": -1.1227047443389893, + "rewards/unsafe_rewards": -1.0956523418426514, + "step": 710 + }, + { + "epoch": 0.39, + "learning_rate": 3.8436891817107555e-07, + "logits/chosen": -0.08887679874897003, + "logits/rejected": 0.611587643623352, + "logps/chosen": -300.66375732421875, + "logps/rejected": -328.358642578125, + "loss": 47678.8, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.1018635034561157, + "rewards/margins": 0.43183404207229614, + "rewards/rejected": -1.5336976051330566, + "rewards/safe_rewards": -1.1127148866653442, + "rewards/unsafe_rewards": -1.0910122394561768, + "step": 720 + }, + { + "epoch": 0.39, + "learning_rate": 3.8038424888396414e-07, + "logits/chosen": -0.025473903864622116, + "logits/rejected": 0.94487464427948, + "logps/chosen": -330.1134338378906, + "logps/rejected": -357.6849670410156, + "loss": 45561.3719, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.2432048320770264, + "rewards/margins": 0.47914719581604004, + "rewards/rejected": -1.7223520278930664, + "rewards/safe_rewards": -1.2225019931793213, + "rewards/unsafe_rewards": -1.263907790184021, + "step": 730 + }, + { + "epoch": 0.4, + "learning_rate": 3.763535497477079e-07, + "logits/chosen": -0.0812605619430542, + "logits/rejected": 0.9112746119499207, + "logps/chosen": -352.16680908203125, + "logps/rejected": -360.167236328125, + "loss": 46534.9469, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.4406814575195312, + "rewards/margins": 0.4508844017982483, + "rewards/rejected": -1.8915659189224243, + "rewards/safe_rewards": -1.4747087955474854, + "rewards/unsafe_rewards": -1.4066541194915771, + "step": 740 + }, + { + "epoch": 0.4, + "learning_rate": 3.7227824372922795e-07, + "logits/chosen": -0.3351050615310669, + "logits/rejected": 0.6222825050354004, + "logps/chosen": -311.34539794921875, + "logps/rejected": -328.29254150390625, + "loss": 46185.125, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.2312049865722656, + "rewards/margins": 0.42659538984298706, + "rewards/rejected": -1.657800316810608, + "rewards/safe_rewards": -1.2009503841400146, + "rewards/unsafe_rewards": -1.2614593505859375, + "step": 750 + }, + { + "epoch": 0.41, + "learning_rate": 3.681597695431148e-07, + "logits/chosen": -0.4905988276004791, + "logits/rejected": 0.30257314443588257, + "logps/chosen": -296.54083251953125, + "logps/rejected": -330.9700622558594, + "loss": 46222.2188, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.9792541265487671, + "rewards/margins": 0.5448631644248962, + "rewards/rejected": -1.524117350578308, + "rewards/safe_rewards": -0.9614570736885071, + "rewards/unsafe_rewards": -0.9970510601997375, + "step": 760 + }, + { + "epoch": 0.41, + "learning_rate": 3.639995811437159e-07, + "logits/chosen": -0.6693869829177856, + "logits/rejected": 0.015661846846342087, + "logps/chosen": -304.0548400878906, + "logps/rejected": -337.1456298828125, + "loss": 46358.5219, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.052179217338562, + "rewards/margins": 0.47597724199295044, + "rewards/rejected": -1.5281565189361572, + "rewards/safe_rewards": -1.0745112895965576, + "rewards/unsafe_rewards": -1.0298470258712769, + "step": 770 + }, + { + "epoch": 0.42, + "learning_rate": 3.597991472118426e-07, + "logits/chosen": -0.6785175204277039, + "logits/rejected": 0.227290540933609, + "logps/chosen": -330.418212890625, + "logps/rejected": -342.43988037109375, + "loss": 48352.3812, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.1566883325576782, + "rewards/margins": 0.4171779155731201, + "rewards/rejected": -1.5738661289215088, + "rewards/safe_rewards": -1.199686050415039, + "rewards/unsafe_rewards": -1.1136906147003174, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 3.5555995063627836e-07, + "logits/chosen": -0.31770753860473633, + "logits/rejected": 0.5166782736778259, + "logps/chosen": -357.19879150390625, + "logps/rejected": -354.46728515625, + "loss": 46157.3844, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.2687500715255737, + "rewards/margins": 0.41667160391807556, + "rewards/rejected": -1.6854215860366821, + "rewards/safe_rewards": -1.2713624238967896, + "rewards/unsafe_rewards": -1.2661378383636475, + "step": 790 + }, + { + "epoch": 0.43, + "learning_rate": 3.512834879902715e-07, + "logits/chosen": -0.03321915119886398, + "logits/rejected": 0.9936412572860718, + "logps/chosen": -321.48822021484375, + "logps/rejected": -341.1229553222656, + "loss": 45445.1562, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.2150646448135376, + "rewards/margins": 0.4430549740791321, + "rewards/rejected": -1.658119559288025, + "rewards/safe_rewards": -1.2271901369094849, + "rewards/unsafe_rewards": -1.2029391527175903, + "step": 800 + }, + { + "epoch": 0.44, + "learning_rate": 3.4697126900319616e-07, + "logits/chosen": 0.07181496918201447, + "logits/rejected": 1.114311933517456, + "logps/chosen": -318.795166015625, + "logps/rejected": -331.4193115234375, + "loss": 47502.65, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.181797981262207, + "rewards/margins": 0.4846934378147125, + "rewards/rejected": -1.6664912700653076, + "rewards/safe_rewards": -1.114547848701477, + "rewards/unsafe_rewards": -1.249047875404358, + "step": 810 + }, + { + "epoch": 0.44, + "learning_rate": 3.426248160275693e-07, + "logits/chosen": -0.5697218179702759, + "logits/rejected": 0.40941324830055237, + "logps/chosen": -297.49658203125, + "logps/rejected": -321.25433349609375, + "loss": 47752.8094, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -0.9996961355209351, + "rewards/margins": 0.4591383934020996, + "rewards/rejected": -1.4588346481323242, + "rewards/safe_rewards": -1.0645678043365479, + "rewards/unsafe_rewards": -0.9348245859146118, + "step": 820 + }, + { + "epoch": 0.45, + "learning_rate": 3.3824566350161094e-07, + "logits/chosen": -0.8640801310539246, + "logits/rejected": 0.15506112575531006, + "logps/chosen": -296.4278259277344, + "logps/rejected": -300.3705139160156, + "loss": 45245.3187, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -0.8809518814086914, + "rewards/margins": 0.43903908133506775, + "rewards/rejected": -1.319991111755371, + "rewards/safe_rewards": -0.8818641901016235, + "rewards/unsafe_rewards": -0.8800395727157593, + "step": 830 + }, + { + "epoch": 0.45, + "learning_rate": 3.338353574075381e-07, + "logits/chosen": -0.12128879874944687, + "logits/rejected": 0.524516224861145, + "logps/chosen": -281.489501953125, + "logps/rejected": -304.2881164550781, + "loss": 53542.8938, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.0612605810165405, + "rewards/margins": 0.3566663861274719, + "rewards/rejected": -1.4179269075393677, + "rewards/safe_rewards": -1.0678443908691406, + "rewards/unsafe_rewards": -1.05467689037323, + "step": 840 + }, + { + "epoch": 0.46, + "learning_rate": 3.2939545472578314e-07, + "logits/chosen": -0.5454502105712891, + "logits/rejected": 0.7278568744659424, + "logps/chosen": -336.9131774902344, + "logps/rejected": -332.34576416015625, + "loss": 48328.5188, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.1075465679168701, + "rewards/margins": 0.37466156482696533, + "rewards/rejected": -1.482208013534546, + "rewards/safe_rewards": -1.0604884624481201, + "rewards/unsafe_rewards": -1.1546049118041992, + "step": 850 + }, + { + "epoch": 0.46, + "learning_rate": 3.2492752288532916e-07, + "logits/chosen": -0.6756476163864136, + "logits/rejected": 0.3920753598213196, + "logps/chosen": -303.96868896484375, + "logps/rejected": -309.6114196777344, + "loss": 46947.8562, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.0026493072509766, + "rewards/margins": 0.4105382561683655, + "rewards/rejected": -1.4131875038146973, + "rewards/safe_rewards": -0.9510561227798462, + "rewards/unsafe_rewards": -1.054242491722107, + "step": 860 + }, + { + "epoch": 0.47, + "learning_rate": 3.204331392103574e-07, + "logits/chosen": -0.8019376993179321, + "logits/rejected": 0.5014632940292358, + "logps/chosen": -307.8298034667969, + "logps/rejected": -302.9684753417969, + "loss": 46707.7094, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.9432118535041809, + "rewards/margins": 0.44711560010910034, + "rewards/rejected": -1.3903272151947021, + "rewards/safe_rewards": -0.9559502601623535, + "rewards/unsafe_rewards": -0.9304733276367188, + "step": 870 + }, + { + "epoch": 0.47, + "learning_rate": 3.159138903634006e-07, + "logits/chosen": 0.3557747006416321, + "logits/rejected": 1.4326626062393188, + "logps/chosen": -334.68927001953125, + "logps/rejected": -336.46722412109375, + "loss": 47447.5656, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -1.260571837425232, + "rewards/margins": 0.3785037100315094, + "rewards/rejected": -1.6390756368637085, + "rewards/safe_rewards": -1.2457797527313232, + "rewards/unsafe_rewards": -1.275363802909851, + "step": 880 + }, + { + "epoch": 0.48, + "learning_rate": 3.1137137178519977e-07, + "logits/chosen": 0.2839844822883606, + "logits/rejected": 1.174262285232544, + "logps/chosen": -316.9241027832031, + "logps/rejected": -353.99908447265625, + "loss": 46724.45, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.348803997039795, + "rewards/margins": 0.4625515043735504, + "rewards/rejected": -1.8113553524017334, + "rewards/safe_rewards": -1.3274753093719482, + "rewards/unsafe_rewards": -1.3701326847076416, + "step": 890 + }, + { + "epoch": 0.48, + "learning_rate": 3.068071871314626e-07, + "logits/chosen": 0.20167894661426544, + "logits/rejected": 0.933432400226593, + "logps/chosen": -307.03790283203125, + "logps/rejected": -320.7303771972656, + "loss": 46014.2531, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.2831897735595703, + "rewards/margins": 0.32940804958343506, + "rewards/rejected": -1.6125978231430054, + "rewards/safe_rewards": -1.2695814371109009, + "rewards/unsafe_rewards": -1.2967984676361084, + "step": 900 + }, + { + "epoch": 0.49, + "learning_rate": 3.022229477067205e-07, + "logits/chosen": -0.042893826961517334, + "logits/rejected": 1.0286198854446411, + "logps/chosen": -341.54608154296875, + "logps/rejected": -342.9488220214844, + "loss": 41978.7312, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.2445247173309326, + "rewards/margins": 0.48235639929771423, + "rewards/rejected": -1.7268812656402588, + "rewards/safe_rewards": -1.239620327949524, + "rewards/unsafe_rewards": -1.24942946434021, + "step": 910 + }, + { + "epoch": 0.49, + "learning_rate": 2.976202718954869e-07, + "logits/chosen": 0.36570900678634644, + "logits/rejected": 1.54874587059021, + "logps/chosen": -343.0382385253906, + "logps/rejected": -363.61077880859375, + "loss": 48903.7281, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.3155945539474487, + "rewards/margins": 0.49677151441574097, + "rewards/rejected": -1.8123661279678345, + "rewards/safe_rewards": -1.3003036975860596, + "rewards/unsafe_rewards": -1.3308852910995483, + "step": 920 + }, + { + "epoch": 0.5, + "learning_rate": 2.930007845909146e-07, + "logits/chosen": 0.4161974787712097, + "logits/rejected": 1.3484485149383545, + "logps/chosen": -337.0982360839844, + "logps/rejected": -358.9637756347656, + "loss": 46673.7125, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -1.394789457321167, + "rewards/margins": 0.3740582764148712, + "rewards/rejected": -1.7688478231430054, + "rewards/safe_rewards": -1.4157686233520508, + "rewards/unsafe_rewards": -1.3738105297088623, + "step": 930 + }, + { + "epoch": 0.51, + "learning_rate": 2.8836611662115634e-07, + "logits/chosen": 0.32437923550605774, + "logits/rejected": 1.6094415187835693, + "logps/chosen": -348.34893798828125, + "logps/rejected": -344.3780517578125, + "loss": 47717.0219, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.2822099924087524, + "rewards/margins": 0.47993484139442444, + "rewards/rejected": -1.762144684791565, + "rewards/safe_rewards": -1.2761863470077515, + "rewards/unsafe_rewards": -1.288233757019043, + "step": 940 + }, + { + "epoch": 0.51, + "learning_rate": 2.8371790417362986e-07, + "logits/chosen": 0.524042010307312, + "logits/rejected": 1.4702574014663696, + "logps/chosen": -302.9036865234375, + "logps/rejected": -330.6932678222656, + "loss": 50530.4812, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -1.1556233167648315, + "rewards/margins": 0.3894086480140686, + "rewards/rejected": -1.5450319051742554, + "rewards/safe_rewards": -1.1308680772781372, + "rewards/unsafe_rewards": -1.1803786754608154, + "step": 950 + }, + { + "epoch": 0.52, + "learning_rate": 2.7905778821739056e-07, + "logits/chosen": 0.3164711594581604, + "logits/rejected": 1.3070744276046753, + "logps/chosen": -301.5843811035156, + "logps/rejected": -303.21160888671875, + "loss": 46752.1469, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.0681132078170776, + "rewards/margins": 0.3526977300643921, + "rewards/rejected": -1.4208109378814697, + "rewards/safe_rewards": -1.0474605560302734, + "rewards/unsafe_rewards": -1.0887658596038818, + "step": 960 + }, + { + "epoch": 0.52, + "learning_rate": 2.74387413923817e-07, + "logits/chosen": 0.8012312650680542, + "logits/rejected": 1.6655724048614502, + "logps/chosen": -349.3222961425781, + "logps/rejected": -352.2900390625, + "loss": 47361.8187, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.2483861446380615, + "rewards/margins": 0.4203091263771057, + "rewards/rejected": -1.6686954498291016, + "rewards/safe_rewards": -1.2491943836212158, + "rewards/unsafe_rewards": -1.2475781440734863, + "step": 970 + }, + { + "epoch": 0.53, + "learning_rate": 2.69708430085812e-07, + "logits/chosen": 0.5219982862472534, + "logits/rejected": 2.162062883377075, + "logps/chosen": -379.4377746582031, + "logps/rejected": -381.57562255859375, + "loss": 49014.0813, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.5605828762054443, + "rewards/margins": 0.4888067841529846, + "rewards/rejected": -2.049389600753784, + "rewards/safe_rewards": -1.5090703964233398, + "rewards/unsafe_rewards": -1.612095594406128, + "step": 980 + }, + { + "epoch": 0.53, + "learning_rate": 2.6502248853572504e-07, + "logits/chosen": 0.9691184759140015, + "logits/rejected": 2.180415391921997, + "logps/chosen": -343.29315185546875, + "logps/rejected": -370.11248779296875, + "loss": 47599.0969, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.5541576147079468, + "rewards/margins": 0.524471640586853, + "rewards/rejected": -2.0786292552948, + "rewards/safe_rewards": -1.5663822889328003, + "rewards/unsafe_rewards": -1.5419328212738037, + "step": 990 + }, + { + "epoch": 0.54, + "learning_rate": 2.6033124356220325e-07, + "logits/chosen": 0.5866010189056396, + "logits/rejected": 2.015026807785034, + "logps/chosen": -332.12591552734375, + "logps/rejected": -341.545654296875, + "loss": 45912.9125, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2935163974761963, + "rewards/margins": 0.5281890034675598, + "rewards/rejected": -1.8217052221298218, + "rewards/safe_rewards": -1.2491321563720703, + "rewards/unsafe_rewards": -1.3379004001617432, + "step": 1000 + }, + { + "epoch": 0.54, + "eval_logits/chosen": 2.3433544635772705, + "eval_logits/rejected": 3.7604947090148926, + "eval_logps/chosen": -277.0185852050781, + "eval_logps/rejected": -275.06304931640625, + "eval_loss": 18076.08984375, + "eval_rewards/accuracies": 0.7393513917922974, + "eval_rewards/chosen": -1.4657968282699585, + "eval_rewards/margins": 0.360120952129364, + "eval_rewards/rejected": -1.8259178400039673, + "eval_rewards/safe_rewards": -1.4604085683822632, + "eval_rewards/unsafe_rewards": -1.4690992832183838, + "eval_runtime": 1042.5504, + "eval_samples_per_second": 31.695, + "eval_steps_per_second": 0.991, + "step": 1000 + }, + { + "epoch": 0.54, + "learning_rate": 2.55636351326173e-07, + "logits/chosen": 0.2637340724468231, + "logits/rejected": 1.3821382522583008, + "logps/chosen": -326.6502990722656, + "logps/rejected": -326.9656677246094, + "loss": 43257.1656, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": -1.2059671878814697, + "rewards/margins": 0.4677943289279938, + "rewards/rejected": -1.6737616062164307, + "rewards/safe_rewards": -1.2266099452972412, + "rewards/unsafe_rewards": -1.1853244304656982, + "step": 1010 + }, + { + "epoch": 0.55, + "learning_rate": 2.509394692761622e-07, + "logits/chosen": 0.17263159155845642, + "logits/rejected": 1.64174485206604, + "logps/chosen": -318.9146728515625, + "logps/rejected": -327.67840576171875, + "loss": 46550.0781, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.1323739290237427, + "rewards/margins": 0.4946485459804535, + "rewards/rejected": -1.6270225048065186, + "rewards/safe_rewards": -1.1417757272720337, + "rewards/unsafe_rewards": -1.1229721307754517, + "step": 1020 + }, + { + "epoch": 0.55, + "learning_rate": 2.462422555631674e-07, + "logits/chosen": 0.5569484829902649, + "logits/rejected": 1.98611581325531, + "logps/chosen": -330.1278076171875, + "logps/rejected": -324.5747375488281, + "loss": 45132.0031, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.276933193206787, + "rewards/margins": 0.4690650999546051, + "rewards/rejected": -1.7459983825683594, + "rewards/safe_rewards": -1.2884827852249146, + "rewards/unsafe_rewards": -1.2653838396072388, + "step": 1030 + }, + { + "epoch": 0.56, + "learning_rate": 2.415463684552728e-07, + "logits/chosen": 0.8780863881111145, + "logits/rejected": 1.8427845239639282, + "logps/chosen": -323.46893310546875, + "logps/rejected": -339.24609375, + "loss": 48276.4844, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.2649118900299072, + "rewards/margins": 0.45407265424728394, + "rewards/rejected": -1.7189843654632568, + "rewards/safe_rewards": -1.2602522373199463, + "rewards/unsafe_rewards": -1.269571304321289, + "step": 1040 + }, + { + "epoch": 0.56, + "learning_rate": 2.3685346575222807e-07, + "logits/chosen": 0.6186736226081848, + "logits/rejected": 2.1833815574645996, + "logps/chosen": -332.42950439453125, + "logps/rejected": -339.97943115234375, + "loss": 45714.9938, + "rewards/accuracies": 0.78125, + "rewards/chosen": -1.147948980331421, + "rewards/margins": 0.5019657015800476, + "rewards/rejected": -1.6499147415161133, + "rewards/safe_rewards": -1.165897011756897, + "rewards/unsafe_rewards": -1.1300010681152344, + "step": 1050 + }, + { + "epoch": 0.57, + "learning_rate": 2.321652042001919e-07, + "logits/chosen": 0.6462287902832031, + "logits/rejected": 2.0647530555725098, + "logps/chosen": -335.7055358886719, + "logps/rejected": -366.52435302734375, + "loss": 45951.4812, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.2451475858688354, + "rewards/margins": 0.4961511194705963, + "rewards/rejected": -1.7412986755371094, + "rewards/safe_rewards": -1.2085615396499634, + "rewards/unsafe_rewards": -1.281733512878418, + "step": 1060 + }, + { + "epoch": 0.58, + "learning_rate": 2.2748323890684662e-07, + "logits/chosen": 0.3872208595275879, + "logits/rejected": 2.07070255279541, + "logps/chosen": -335.6614685058594, + "logps/rejected": -345.56463623046875, + "loss": 45901.4969, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.2319549322128296, + "rewards/margins": 0.574022650718689, + "rewards/rejected": -1.8059775829315186, + "rewards/safe_rewards": -1.2641961574554443, + "rewards/unsafe_rewards": -1.1997138261795044, + "step": 1070 + }, + { + "epoch": 0.58, + "learning_rate": 2.2280922275709213e-07, + "logits/chosen": 0.2748260498046875, + "logits/rejected": 1.2558234930038452, + "logps/chosen": -338.2221374511719, + "logps/rejected": -350.45330810546875, + "loss": 45983.1844, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.2712537050247192, + "rewards/margins": 0.42523473501205444, + "rewards/rejected": -1.6964881420135498, + "rewards/safe_rewards": -1.2452527284622192, + "rewards/unsafe_rewards": -1.2972544431686401, + "step": 1080 + }, + { + "epoch": 0.59, + "learning_rate": 2.1814480582952375e-07, + "logits/chosen": 0.2227509766817093, + "logits/rejected": 1.3561017513275146, + "logps/chosen": -327.06036376953125, + "logps/rejected": -353.83819580078125, + "loss": 45787.4688, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -1.2495238780975342, + "rewards/margins": 0.4897841811180115, + "rewards/rejected": -1.7393079996109009, + "rewards/safe_rewards": -1.1957545280456543, + "rewards/unsafe_rewards": -1.3032933473587036, + "step": 1090 + }, + { + "epoch": 0.59, + "learning_rate": 2.1349163481390187e-07, + "logits/chosen": 0.14423558115959167, + "logits/rejected": 1.2194325923919678, + "logps/chosen": -319.0032653808594, + "logps/rejected": -341.0852966308594, + "loss": 46315.0375, + "rewards/accuracies": 0.78125, + "rewards/chosen": -1.1849133968353271, + "rewards/margins": 0.4637610912322998, + "rewards/rejected": -1.6486746072769165, + "rewards/safe_rewards": -1.1174824237823486, + "rewards/unsafe_rewards": -1.2523443698883057, + "step": 1100 + }, + { + "epoch": 0.6, + "learning_rate": 2.0885135242981647e-07, + "logits/chosen": 0.20733359456062317, + "logits/rejected": 1.5299054384231567, + "logps/chosen": -357.74072265625, + "logps/rejected": -325.88092041015625, + "loss": 45521.4812, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.2555134296417236, + "rewards/margins": 0.3978913724422455, + "rewards/rejected": -1.6534048318862915, + "rewards/safe_rewards": -1.3170197010040283, + "rewards/unsafe_rewards": -1.1940072774887085, + "step": 1110 + }, + { + "epoch": 0.6, + "learning_rate": 2.0422559684675494e-07, + "logits/chosen": 0.3540279269218445, + "logits/rejected": 1.7412567138671875, + "logps/chosen": -343.0755615234375, + "logps/rejected": -344.5616149902344, + "loss": 44749.9812, + "rewards/accuracies": 0.78125, + "rewards/chosen": -1.2586820125579834, + "rewards/margins": 0.46134939789772034, + "rewards/rejected": -1.7200313806533813, + "rewards/safe_rewards": -1.261116623878479, + "rewards/unsafe_rewards": -1.2562475204467773, + "step": 1120 + }, + { + "epoch": 0.61, + "learning_rate": 1.9961600110577457e-07, + "logits/chosen": 0.7491122484207153, + "logits/rejected": 1.9913356304168701, + "logps/chosen": -345.251708984375, + "logps/rejected": -365.1427307128906, + "loss": 47614.8313, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.3633828163146973, + "rewards/margins": 0.40323466062545776, + "rewards/rejected": -1.7666175365447998, + "rewards/safe_rewards": -1.3032304048538208, + "rewards/unsafe_rewards": -1.4235353469848633, + "step": 1130 + }, + { + "epoch": 0.61, + "learning_rate": 1.950241925429867e-07, + "logits/chosen": 0.3903161585330963, + "logits/rejected": 1.8490869998931885, + "logps/chosen": -323.1323547363281, + "logps/rejected": -337.658447265625, + "loss": 46266.0656, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -1.2305465936660767, + "rewards/margins": 0.5337331891059875, + "rewards/rejected": -1.7642797231674194, + "rewards/safe_rewards": -1.2174155712127686, + "rewards/unsafe_rewards": -1.2436776161193848, + "step": 1140 + }, + { + "epoch": 0.62, + "learning_rate": 1.9045179221505495e-07, + "logits/chosen": 0.2251172959804535, + "logits/rejected": 1.2826286554336548, + "logps/chosen": -340.961669921875, + "logps/rejected": -347.27032470703125, + "loss": 44945.3625, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -1.1167148351669312, + "rewards/margins": 0.47817033529281616, + "rewards/rejected": -1.5948853492736816, + "rewards/safe_rewards": -1.0529330968856812, + "rewards/unsafe_rewards": -1.1804964542388916, + "step": 1150 + }, + { + "epoch": 0.62, + "learning_rate": 1.8590041432690893e-07, + "logits/chosen": 0.45856374502182007, + "logits/rejected": 1.3290952444076538, + "logps/chosen": -309.6344909667969, + "logps/rejected": -326.86602783203125, + "loss": 46534.3438, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.2198865413665771, + "rewards/margins": 0.3725244104862213, + "rewards/rejected": -1.592410922050476, + "rewards/safe_rewards": -1.2590606212615967, + "rewards/unsafe_rewards": -1.1807124614715576, + "step": 1160 + }, + { + "epoch": 0.63, + "learning_rate": 1.813716656618788e-07, + "logits/chosen": 0.5377748608589172, + "logits/rejected": 1.4534975290298462, + "logps/chosen": -318.2839660644531, + "logps/rejected": -336.2844543457031, + "loss": 47006.5875, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -1.2998250722885132, + "rewards/margins": 0.41484713554382324, + "rewards/rejected": -1.7146720886230469, + "rewards/safe_rewards": -1.2636735439300537, + "rewards/unsafe_rewards": -1.335976481437683, + "step": 1170 + }, + { + "epoch": 0.63, + "learning_rate": 1.7686714501444788e-07, + "logits/chosen": 0.26785796880722046, + "logits/rejected": 2.0348572731018066, + "logps/chosen": -345.3648376464844, + "logps/rejected": -344.6531677246094, + "loss": 48105.6312, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.3300942182540894, + "rewards/margins": 0.49929314851760864, + "rewards/rejected": -1.8293873071670532, + "rewards/safe_rewards": -1.3624296188354492, + "rewards/unsafe_rewards": -1.29775869846344, + "step": 1180 + }, + { + "epoch": 0.64, + "learning_rate": 1.7238844262582768e-07, + "logits/chosen": 0.6439474821090698, + "logits/rejected": 1.2585684061050415, + "logps/chosen": -330.22271728515625, + "logps/rejected": -354.88641357421875, + "loss": 44955.6813, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -1.2324410676956177, + "rewards/margins": 0.40000033378601074, + "rewards/rejected": -1.6324412822723389, + "rewards/safe_rewards": -1.1928186416625977, + "rewards/unsafe_rewards": -1.2720633745193481, + "step": 1190 + }, + { + "epoch": 0.65, + "learning_rate": 1.679371396225504e-07, + "logits/chosen": 0.6228231191635132, + "logits/rejected": 2.0198254585266113, + "logps/chosen": -320.97027587890625, + "logps/rejected": -351.89324951171875, + "loss": 46898.7312, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.198693871498108, + "rewards/margins": 0.5077270269393921, + "rewards/rejected": -1.7064208984375, + "rewards/safe_rewards": -1.1311042308807373, + "rewards/unsafe_rewards": -1.266283392906189, + "step": 1200 + }, + { + "epoch": 0.65, + "learning_rate": 1.6351480745828096e-07, + "logits/chosen": 0.5048326253890991, + "logits/rejected": 1.7347217798233032, + "logps/chosen": -323.8348388671875, + "logps/rejected": -339.9756774902344, + "loss": 42753.7312, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2289304733276367, + "rewards/margins": 0.4678264260292053, + "rewards/rejected": -1.6967569589614868, + "rewards/safe_rewards": -1.2443517446517944, + "rewards/unsafe_rewards": -1.2135089635849, + "step": 1210 + }, + { + "epoch": 0.66, + "learning_rate": 1.5912300735904248e-07, + "logits/chosen": 0.4844638705253601, + "logits/rejected": 1.7994003295898438, + "logps/chosen": -343.48394775390625, + "logps/rejected": -341.53839111328125, + "loss": 44673.1375, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.2068605422973633, + "rewards/margins": 0.4373961091041565, + "rewards/rejected": -1.644256591796875, + "rewards/safe_rewards": -1.2695971727371216, + "rewards/unsafe_rewards": -1.144123911857605, + "step": 1220 + }, + { + "epoch": 0.66, + "learning_rate": 1.5476328977205395e-07, + "logits/chosen": 0.6877886652946472, + "logits/rejected": 1.9488757848739624, + "logps/chosen": -325.8306884765625, + "logps/rejected": -334.74102783203125, + "loss": 45182.2469, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -1.1946306228637695, + "rewards/margins": 0.5089387893676758, + "rewards/rejected": -1.7035694122314453, + "rewards/safe_rewards": -1.2559829950332642, + "rewards/unsafe_rewards": -1.1332781314849854, + "step": 1230 + }, + { + "epoch": 0.67, + "learning_rate": 1.5043719381837112e-07, + "logits/chosen": 1.0237867832183838, + "logits/rejected": 2.033402919769287, + "logps/chosen": -345.27288818359375, + "logps/rejected": -357.49346923828125, + "loss": 45143.4625, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -1.2465251684188843, + "rewards/margins": 0.4569992125034332, + "rewards/rejected": -1.7035243511199951, + "rewards/safe_rewards": -1.23558509349823, + "rewards/unsafe_rewards": -1.257465124130249, + "step": 1240 + }, + { + "epoch": 0.67, + "learning_rate": 1.461462467495284e-07, + "logits/chosen": 1.0214693546295166, + "logits/rejected": 2.1978230476379395, + "logps/chosen": -313.70184326171875, + "logps/rejected": -354.9341125488281, + "loss": 42228.4, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.358081579208374, + "rewards/margins": 0.5420340299606323, + "rewards/rejected": -1.9001156091690063, + "rewards/safe_rewards": -1.3530011177062988, + "rewards/unsafe_rewards": -1.3631622791290283, + "step": 1250 + }, + { + "epoch": 0.68, + "learning_rate": 1.4189196340836865e-07, + "logits/chosen": 0.6835118532180786, + "logits/rejected": 2.350911855697632, + "logps/chosen": -324.4222412109375, + "logps/rejected": -343.55059814453125, + "loss": 44373.9875, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -1.2565511465072632, + "rewards/margins": 0.4831761419773102, + "rewards/rejected": -1.7397273778915405, + "rewards/safe_rewards": -1.263863205909729, + "rewards/unsafe_rewards": -1.2492389678955078, + "step": 1260 + }, + { + "epoch": 0.68, + "learning_rate": 1.3767584569425561e-07, + "logits/chosen": 0.5010203123092651, + "logits/rejected": 2.0481631755828857, + "logps/chosen": -337.4039001464844, + "logps/rejected": -346.68115234375, + "loss": 44939.1125, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.2905752658843994, + "rewards/margins": 0.4765418469905853, + "rewards/rejected": -1.7671171426773071, + "rewards/safe_rewards": -1.315500020980835, + "rewards/unsafe_rewards": -1.2656505107879639, + "step": 1270 + }, + { + "epoch": 0.69, + "learning_rate": 1.334993820328541e-07, + "logits/chosen": 0.7863733768463135, + "logits/rejected": 1.958449125289917, + "logps/chosen": -313.765380859375, + "logps/rejected": -344.0064392089844, + "loss": 45082.7156, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.3724980354309082, + "rewards/margins": 0.5656505823135376, + "rewards/rejected": -1.9381484985351562, + "rewards/safe_rewards": -1.337083101272583, + "rewards/unsafe_rewards": -1.4079129695892334, + "step": 1280 + }, + { + "epoch": 0.69, + "learning_rate": 1.2936404685066852e-07, + "logits/chosen": 0.7806060314178467, + "logits/rejected": 1.849461317062378, + "logps/chosen": -349.4793701171875, + "logps/rejected": -368.14093017578125, + "loss": 47371.1594, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.314326286315918, + "rewards/margins": 0.4360658526420593, + "rewards/rejected": -1.7503923177719116, + "rewards/safe_rewards": -1.3694612979888916, + "rewards/unsafe_rewards": -1.2591913938522339, + "step": 1290 + }, + { + "epoch": 0.7, + "learning_rate": 1.252713000545221e-07, + "logits/chosen": 0.3833943009376526, + "logits/rejected": 1.8072017431259155, + "logps/chosen": -341.93511962890625, + "logps/rejected": -348.7206726074219, + "loss": 43052.2219, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.2315027713775635, + "rewards/margins": 0.538256049156189, + "rewards/rejected": -1.7697588205337524, + "rewards/safe_rewards": -1.2354466915130615, + "rewards/unsafe_rewards": -1.2275586128234863, + "step": 1300 + }, + { + "epoch": 0.7, + "learning_rate": 1.2122258651616304e-07, + "logits/chosen": 0.5075196027755737, + "logits/rejected": 1.9880084991455078, + "logps/chosen": -333.9472961425781, + "logps/rejected": -328.385009765625, + "loss": 45211.2688, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.2371238470077515, + "rewards/margins": 0.4271668493747711, + "rewards/rejected": -1.6642907857894897, + "rewards/safe_rewards": -1.1765857934951782, + "rewards/unsafe_rewards": -1.2976620197296143, + "step": 1310 + }, + { + "epoch": 0.71, + "learning_rate": 1.1721933556217792e-07, + "logits/chosen": 0.8027510643005371, + "logits/rejected": 1.9998795986175537, + "logps/chosen": -324.43829345703125, + "logps/rejected": -347.68817138671875, + "loss": 46118.7312, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.2388060092926025, + "rewards/margins": 0.47809821367263794, + "rewards/rejected": -1.7169040441513062, + "rewards/safe_rewards": -1.2746732234954834, + "rewards/unsafe_rewards": -1.202938437461853, + "step": 1320 + }, + { + "epoch": 0.72, + "learning_rate": 1.1326296046939333e-07, + "logits/chosen": 0.8427258729934692, + "logits/rejected": 2.102285385131836, + "logps/chosen": -317.03411865234375, + "logps/rejected": -334.964111328125, + "loss": 43990.4281, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -1.2212368249893188, + "rewards/margins": 0.54598468542099, + "rewards/rejected": -1.767221450805664, + "rewards/safe_rewards": -1.176167368888855, + "rewards/unsafe_rewards": -1.2663061618804932, + "step": 1330 + }, + { + "epoch": 0.72, + "learning_rate": 1.0935485796594351e-07, + "logits/chosen": 0.7029412984848022, + "logits/rejected": 2.3506875038146973, + "logps/chosen": -350.2371826171875, + "logps/rejected": -353.49774169921875, + "loss": 48819.8625, + "rewards/accuracies": 0.8062499761581421, + "rewards/chosen": -1.2572548389434814, + "rewards/margins": 0.5758508443832397, + "rewards/rejected": -1.8331058025360107, + "rewards/safe_rewards": -1.2671434879302979, + "rewards/unsafe_rewards": -1.2473663091659546, + "step": 1340 + }, + { + "epoch": 0.73, + "learning_rate": 1.0549640773818028e-07, + "logits/chosen": 0.8472411036491394, + "logits/rejected": 1.7594820261001587, + "logps/chosen": -337.47723388671875, + "logps/rejected": -342.13970947265625, + "loss": 48486.0563, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": -1.3714921474456787, + "rewards/margins": 0.39531388878822327, + "rewards/rejected": -1.76680588722229, + "rewards/safe_rewards": -1.4435055255889893, + "rewards/unsafe_rewards": -1.2994787693023682, + "step": 1350 + }, + { + "epoch": 0.73, + "learning_rate": 1.0168897194359921e-07, + "logits/chosen": 0.500839352607727, + "logits/rejected": 1.7780015468597412, + "logps/chosen": -361.5278625488281, + "logps/rejected": -363.12115478515625, + "loss": 45162.4906, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.3371957540512085, + "rewards/margins": 0.4367925524711609, + "rewards/rejected": -1.7739884853363037, + "rewards/safe_rewards": -1.281618356704712, + "rewards/unsafe_rewards": -1.392772912979126, + "step": 1360 + }, + { + "epoch": 0.74, + "learning_rate": 9.793389472995392e-08, + "logits/chosen": 0.7227199673652649, + "logits/rejected": 2.460563898086548, + "logps/chosen": -339.8235168457031, + "logps/rejected": -332.1888122558594, + "loss": 41779.2125, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.242737889289856, + "rewards/margins": 0.5653219223022461, + "rewards/rejected": -1.8080596923828125, + "rewards/safe_rewards": -1.2246896028518677, + "rewards/unsafe_rewards": -1.2607860565185547, + "step": 1370 + }, + { + "epoch": 0.74, + "learning_rate": 9.423250176072874e-08, + "logits/chosen": 0.5593563914299011, + "logits/rejected": 2.1021389961242676, + "logps/chosen": -324.47369384765625, + "logps/rejected": -324.517578125, + "loss": 50324.8187, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3556016683578491, + "rewards/margins": 0.4147214889526367, + "rewards/rejected": -1.7703230381011963, + "rewards/safe_rewards": -1.3182268142700195, + "rewards/unsafe_rewards": -1.3929765224456787, + "step": 1380 + }, + { + "epoch": 0.75, + "learning_rate": 9.058609974713654e-08, + "logits/chosen": 0.5155555009841919, + "logits/rejected": 2.049661159515381, + "logps/chosen": -334.1679992675781, + "logps/rejected": -354.1297607421875, + "loss": 43483.6312, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -1.294174313545227, + "rewards/margins": 0.5094290375709534, + "rewards/rejected": -1.8036034107208252, + "rewards/safe_rewards": -1.2831361293792725, + "rewards/unsafe_rewards": -1.3052124977111816, + "step": 1390 + }, + { + "epoch": 0.75, + "learning_rate": 8.699597598680753e-08, + "logits/chosen": 0.9858619570732117, + "logits/rejected": 2.2694084644317627, + "logps/chosen": -323.10565185546875, + "logps/rejected": -341.9316711425781, + "loss": 43113.5437, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.3101509809494019, + "rewards/margins": 0.48469042778015137, + "rewards/rejected": -1.7948414087295532, + "rewards/safe_rewards": -1.291156530380249, + "rewards/unsafe_rewards": -1.3291454315185547, + "step": 1400 + }, + { + "epoch": 0.76, + "learning_rate": 8.346339790933166e-08, + "logits/chosen": 0.7411304116249084, + "logits/rejected": 2.29244065284729, + "logps/chosen": -327.34124755859375, + "logps/rejected": -342.042236328125, + "loss": 45545.1562, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.3447813987731934, + "rewards/margins": 0.5028257369995117, + "rewards/rejected": -1.8476070165634155, + "rewards/safe_rewards": -1.3410793542861938, + "rewards/unsafe_rewards": -1.348483681678772, + "step": 1410 + }, + { + "epoch": 0.76, + "learning_rate": 7.998961262881506e-08, + "logits/chosen": 0.6825073957443237, + "logits/rejected": 2.3765223026275635, + "logps/chosen": -350.10089111328125, + "logps/rejected": -340.95391845703125, + "loss": 45268.6406, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -1.329595923423767, + "rewards/margins": 0.4952804148197174, + "rewards/rejected": -1.8248764276504517, + "rewards/safe_rewards": -1.4233167171478271, + "rewards/unsafe_rewards": -1.2358754873275757, + "step": 1420 + }, + { + "epoch": 0.77, + "learning_rate": 7.657584650360846e-08, + "logits/chosen": 1.2880120277404785, + "logits/rejected": 2.2333691120147705, + "logps/chosen": -325.5841369628906, + "logps/rejected": -335.57281494140625, + "loss": 45864.225, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.3922796249389648, + "rewards/margins": 0.45693549513816833, + "rewards/rejected": -1.8492151498794556, + "rewards/safe_rewards": -1.438798189163208, + "rewards/unsafe_rewards": -1.3457610607147217, + "step": 1430 + }, + { + "epoch": 0.77, + "learning_rate": 7.322330470336313e-08, + "logits/chosen": 1.0199222564697266, + "logits/rejected": 2.495116710662842, + "logps/chosen": -343.35284423828125, + "logps/rejected": -367.7138671875, + "loss": 44359.2125, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.3443058729171753, + "rewards/margins": 0.5073198080062866, + "rewards/rejected": -1.851625680923462, + "rewards/safe_rewards": -1.269716739654541, + "rewards/unsafe_rewards": -1.4188950061798096, + "step": 1440 + }, + { + "epoch": 0.78, + "learning_rate": 6.993317078356709e-08, + "logits/chosen": 1.0538650751113892, + "logits/rejected": 1.7666943073272705, + "logps/chosen": -351.0872497558594, + "logps/rejected": -347.7657775878906, + "loss": 45573.3656, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.4095313549041748, + "rewards/margins": 0.3358529508113861, + "rewards/rejected": -1.7453845739364624, + "rewards/safe_rewards": -1.4407528638839722, + "rewards/unsafe_rewards": -1.3783104419708252, + "step": 1450 + }, + { + "epoch": 0.79, + "learning_rate": 6.67066062677118e-08, + "logits/chosen": 0.713579535484314, + "logits/rejected": 2.153822422027588, + "logps/chosen": -331.4977722167969, + "logps/rejected": -329.66253662109375, + "loss": 48022.5813, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.305993676185608, + "rewards/margins": 0.4226410388946533, + "rewards/rejected": -1.7286345958709717, + "rewards/safe_rewards": -1.3051173686981201, + "rewards/unsafe_rewards": -1.3068701028823853, + "step": 1460 + }, + { + "epoch": 0.79, + "learning_rate": 6.354475023723685e-08, + "logits/chosen": 0.7345126271247864, + "logits/rejected": 2.134060859680176, + "logps/chosen": -377.17303466796875, + "logps/rejected": -377.3031311035156, + "loss": 44610.3844, + "rewards/accuracies": 0.78125, + "rewards/chosen": -1.431287169456482, + "rewards/margins": 0.5760510563850403, + "rewards/rejected": -2.007338285446167, + "rewards/safe_rewards": -1.4108716249465942, + "rewards/unsafe_rewards": -1.4517028331756592, + "step": 1470 + }, + { + "epoch": 0.8, + "learning_rate": 6.044871892939746e-08, + "logits/chosen": 0.4783990979194641, + "logits/rejected": 2.1396868228912354, + "logps/chosen": -355.8190612792969, + "logps/rejected": -373.56951904296875, + "loss": 44942.15, + "rewards/accuracies": 0.793749988079071, + "rewards/chosen": -1.439534306526184, + "rewards/margins": 0.5015610456466675, + "rewards/rejected": -1.9410955905914307, + "rewards/safe_rewards": -1.472259759902954, + "rewards/unsafe_rewards": -1.4068090915679932, + "step": 1480 + }, + { + "epoch": 0.8, + "learning_rate": 5.741960534319676e-08, + "logits/chosen": 0.7640841603279114, + "logits/rejected": 1.658362627029419, + "logps/chosen": -316.4656677246094, + "logps/rejected": -339.99285888671875, + "loss": 44726.5656, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.4332436323165894, + "rewards/margins": 0.40809011459350586, + "rewards/rejected": -1.8413337469100952, + "rewards/safe_rewards": -1.5501121282577515, + "rewards/unsafe_rewards": -1.3163751363754272, + "step": 1490 + }, + { + "epoch": 0.81, + "learning_rate": 5.44584788535217e-08, + "logits/chosen": 0.8027761578559875, + "logits/rejected": 2.1775002479553223, + "logps/chosen": -358.7960510253906, + "logps/rejected": -365.4564208984375, + "loss": 42808.3344, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.4149761199951172, + "rewards/margins": 0.5123807191848755, + "rewards/rejected": -1.9273567199707031, + "rewards/safe_rewards": -1.3507754802703857, + "rewards/unsafe_rewards": -1.4791765213012695, + "step": 1500 + }, + { + "epoch": 0.81, + "eval_logits/chosen": 2.821444034576416, + "eval_logits/rejected": 4.325323581695557, + "eval_logps/chosen": -297.0653076171875, + "eval_logps/rejected": -291.0033874511719, + "eval_loss": 18052.392578125, + "eval_rewards/accuracies": 0.7414085268974304, + "eval_rewards/chosen": -1.6662639379501343, + "eval_rewards/margins": 0.31905752420425415, + "eval_rewards/rejected": -1.9853215217590332, + "eval_rewards/safe_rewards": -1.6613043546676636, + "eval_rewards/unsafe_rewards": -1.6696115732192993, + "eval_runtime": 1042.1471, + "eval_samples_per_second": 31.708, + "eval_steps_per_second": 0.991, + "step": 1500 + }, + { + "epoch": 0.81, + "learning_rate": 5.156638483361933e-08, + "logits/chosen": 0.42664867639541626, + "logits/rejected": 1.845710039138794, + "logps/chosen": -348.5888671875, + "logps/rejected": -366.8309631347656, + "loss": 44186.3688, + "rewards/accuracies": 0.793749988079071, + "rewards/chosen": -1.3070577383041382, + "rewards/margins": 0.5283392667770386, + "rewards/rejected": -1.8353971242904663, + "rewards/safe_rewards": -1.357853651046753, + "rewards/unsafe_rewards": -1.2562623023986816, + "step": 1510 + }, + { + "epoch": 0.82, + "learning_rate": 4.8744344286046236e-08, + "logits/chosen": 0.7515798807144165, + "logits/rejected": 1.9656890630722046, + "logps/chosen": -349.0513610839844, + "logps/rejected": -356.20391845703125, + "loss": 45629.8187, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.323312520980835, + "rewards/margins": 0.4178221821784973, + "rewards/rejected": -1.7411346435546875, + "rewards/safe_rewards": -1.3893558979034424, + "rewards/unsafe_rewards": -1.2572689056396484, + "step": 1520 + }, + { + "epoch": 0.82, + "learning_rate": 4.599335348222169e-08, + "logits/chosen": 0.768551230430603, + "logits/rejected": 1.736471176147461, + "logps/chosen": -357.14111328125, + "logps/rejected": -386.8569030761719, + "loss": 46133.4156, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -1.351975440979004, + "rewards/margins": 0.5045411586761475, + "rewards/rejected": -1.8565164804458618, + "rewards/safe_rewards": -1.3341138362884521, + "rewards/unsafe_rewards": -1.3698370456695557, + "step": 1530 + }, + { + "epoch": 0.83, + "learning_rate": 4.331438361071163e-08, + "logits/chosen": 0.6218854784965515, + "logits/rejected": 1.4323031902313232, + "logps/chosen": -369.9754638671875, + "logps/rejected": -378.414794921875, + "loss": 47297.1875, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.3021761178970337, + "rewards/margins": 0.4045158922672272, + "rewards/rejected": -1.7066919803619385, + "rewards/safe_rewards": -1.327862024307251, + "rewards/unsafe_rewards": -1.2764902114868164, + "step": 1540 + }, + { + "epoch": 0.83, + "learning_rate": 4.0708380434367864e-08, + "logits/chosen": 0.5318705439567566, + "logits/rejected": 1.8752968311309814, + "logps/chosen": -334.50677490234375, + "logps/rejected": -358.82440185546875, + "loss": 42945.7375, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -1.365651249885559, + "rewards/margins": 0.48110684752464294, + "rewards/rejected": -1.8467578887939453, + "rewards/safe_rewards": -1.4043294191360474, + "rewards/unsafe_rewards": -1.3269728422164917, + "step": 1550 + }, + { + "epoch": 0.84, + "learning_rate": 3.817626395644305e-08, + "logits/chosen": 0.4170842170715332, + "logits/rejected": 1.7395483255386353, + "logps/chosen": -327.5775146484375, + "logps/rejected": -337.7476501464844, + "loss": 47379.525, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.296163558959961, + "rewards/margins": 0.39941540360450745, + "rewards/rejected": -1.695578932762146, + "rewards/safe_rewards": -1.2791547775268555, + "rewards/unsafe_rewards": -1.3131721019744873, + "step": 1560 + }, + { + "epoch": 0.84, + "learning_rate": 3.571892809580013e-08, + "logits/chosen": 0.680249810218811, + "logits/rejected": 1.7280004024505615, + "logps/chosen": -333.6506042480469, + "logps/rejected": -345.38018798828125, + "loss": 44625.6906, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.4253218173980713, + "rewards/margins": 0.3435383141040802, + "rewards/rejected": -1.768860101699829, + "rewards/safe_rewards": -1.3657004833221436, + "rewards/unsafe_rewards": -1.4849430322647095, + "step": 1570 + }, + { + "epoch": 0.85, + "learning_rate": 3.333724037132976e-08, + "logits/chosen": 0.6499396562576294, + "logits/rejected": 1.8431791067123413, + "logps/chosen": -342.41864013671875, + "logps/rejected": -366.51055908203125, + "loss": 45061.4125, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -1.362977147102356, + "rewards/margins": 0.4591442942619324, + "rewards/rejected": -1.8221213817596436, + "rewards/safe_rewards": -1.3459404706954956, + "rewards/unsafe_rewards": -1.3800138235092163, + "step": 1580 + }, + { + "epoch": 0.86, + "learning_rate": 3.1032041595688506e-08, + "logits/chosen": 0.5928664207458496, + "logits/rejected": 1.9816405773162842, + "logps/chosen": -336.5494384765625, + "logps/rejected": -359.42901611328125, + "loss": 44585.0625, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.3968385457992554, + "rewards/margins": 0.48286277055740356, + "rewards/rejected": -1.8797012567520142, + "rewards/safe_rewards": -1.411312460899353, + "rewards/unsafe_rewards": -1.3823645114898682, + "step": 1590 + }, + { + "epoch": 0.86, + "learning_rate": 2.880414557846453e-08, + "logits/chosen": 0.5597755312919617, + "logits/rejected": 1.3687412738800049, + "logps/chosen": -316.55535888671875, + "logps/rejected": -333.7750244140625, + "loss": 43494.3531, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.3153984546661377, + "rewards/margins": 0.4445430636405945, + "rewards/rejected": -1.7599414587020874, + "rewards/safe_rewards": -1.300894021987915, + "rewards/unsafe_rewards": -1.32990300655365, + "step": 1600 + }, + { + "epoch": 0.87, + "learning_rate": 2.6654338838876662e-08, + "logits/chosen": 0.4141656756401062, + "logits/rejected": 2.1755805015563965, + "logps/chosen": -347.25018310546875, + "logps/rejected": -342.794189453125, + "loss": 42576.225, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.3290445804595947, + "rewards/margins": 0.5911308526992798, + "rewards/rejected": -1.920175313949585, + "rewards/safe_rewards": -1.3600118160247803, + "rewards/unsafe_rewards": -1.2980775833129883, + "step": 1610 + }, + { + "epoch": 0.87, + "learning_rate": 2.4583380328107805e-08, + "logits/chosen": 0.545110285282135, + "logits/rejected": 1.9376652240753174, + "logps/chosen": -352.3214416503906, + "logps/rejected": -352.32037353515625, + "loss": 46019.1562, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -1.266367793083191, + "rewards/margins": 0.5374796986579895, + "rewards/rejected": -1.803847312927246, + "rewards/safe_rewards": -1.235494613647461, + "rewards/unsafe_rewards": -1.2972408533096313, + "step": 1620 + }, + { + "epoch": 0.88, + "learning_rate": 2.259200116137039e-08, + "logits/chosen": 0.5177568793296814, + "logits/rejected": 1.600571632385254, + "logps/chosen": -357.31402587890625, + "logps/rejected": -373.8202209472656, + "loss": 46638.075, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -1.4028669595718384, + "rewards/margins": 0.40924137830734253, + "rewards/rejected": -1.8121082782745361, + "rewards/safe_rewards": -1.4030863046646118, + "rewards/unsafe_rewards": -1.402647614479065, + "step": 1630 + }, + { + "epoch": 0.88, + "learning_rate": 2.068090435979958e-08, + "logits/chosen": 0.8154770731925964, + "logits/rejected": 1.8314367532730103, + "logps/chosen": -329.49334716796875, + "logps/rejected": -338.260009765625, + "loss": 46140.8625, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.236817479133606, + "rewards/margins": 0.434980571269989, + "rewards/rejected": -1.6717981100082397, + "rewards/safe_rewards": -1.2262144088745117, + "rewards/unsafe_rewards": -1.247420310974121, + "step": 1640 + }, + { + "epoch": 0.89, + "learning_rate": 1.8850764602263423e-08, + "logits/chosen": 0.7932789325714111, + "logits/rejected": 2.1938228607177734, + "logps/chosen": -328.5201416015625, + "logps/rejected": -360.6786804199219, + "loss": 44504.5969, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.3161656856536865, + "rewards/margins": 0.45560845732688904, + "rewards/rejected": -1.7717742919921875, + "rewards/safe_rewards": -1.3580942153930664, + "rewards/unsafe_rewards": -1.274237036705017, + "step": 1650 + }, + { + "epoch": 0.89, + "learning_rate": 1.710222798718028e-08, + "logits/chosen": 0.6647995710372925, + "logits/rejected": 1.6251413822174072, + "logps/chosen": -345.324462890625, + "logps/rejected": -375.11444091796875, + "loss": 43753.2188, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.3441579341888428, + "rewards/margins": 0.4422858655452728, + "rewards/rejected": -1.7864439487457275, + "rewards/safe_rewards": -1.3913052082061768, + "rewards/unsafe_rewards": -1.2970105409622192, + "step": 1660 + }, + { + "epoch": 0.9, + "learning_rate": 1.5435911804424356e-08, + "logits/chosen": 0.5311527848243713, + "logits/rejected": 1.4988657236099243, + "logps/chosen": -351.7324523925781, + "logps/rejected": -358.2347106933594, + "loss": 48001.8063, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.242690920829773, + "rewards/margins": 0.4586945176124573, + "rewards/rejected": -1.701385498046875, + "rewards/safe_rewards": -1.2917897701263428, + "rewards/unsafe_rewards": -1.1935926675796509, + "step": 1670 + }, + { + "epoch": 0.9, + "learning_rate": 1.3852404317403199e-08, + "logits/chosen": 0.5919063687324524, + "logits/rejected": 1.6568517684936523, + "logps/chosen": -313.7787780761719, + "logps/rejected": -354.34075927734375, + "loss": 45982.5312, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.268061876296997, + "rewards/margins": 0.39638254046440125, + "rewards/rejected": -1.6644443273544312, + "rewards/safe_rewards": -1.306846261024475, + "rewards/unsafe_rewards": -1.2292778491973877, + "step": 1680 + }, + { + "epoch": 0.91, + "learning_rate": 1.235226455538113e-08, + "logits/chosen": 0.5989871025085449, + "logits/rejected": 1.5718333721160889, + "logps/chosen": -334.32562255859375, + "logps/rejected": -353.8414001464844, + "loss": 47146.0125, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.2909879684448242, + "rewards/margins": 0.44913721084594727, + "rewards/rejected": -1.740125060081482, + "rewards/safe_rewards": -1.291917085647583, + "rewards/unsafe_rewards": -1.2900588512420654, + "step": 1690 + }, + { + "epoch": 0.91, + "learning_rate": 1.0936022116124321e-08, + "logits/chosen": 0.4549272656440735, + "logits/rejected": 1.7380586862564087, + "logps/chosen": -323.08221435546875, + "logps/rejected": -343.3038635253906, + "loss": 43395.9313, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.2304251194000244, + "rewards/margins": 0.5138837695121765, + "rewards/rejected": -1.7443090677261353, + "rewards/safe_rewards": -1.2706966400146484, + "rewards/unsafe_rewards": -1.1901538372039795, + "step": 1700 + }, + { + "epoch": 0.92, + "learning_rate": 9.60417697893534e-09, + "logits/chosen": 0.5668866038322449, + "logits/rejected": 1.5408270359039307, + "logps/chosen": -326.76031494140625, + "logps/rejected": -354.31707763671875, + "loss": 45812.6469, + "rewards/accuracies": 0.71875, + "rewards/chosen": -1.302071213722229, + "rewards/margins": 0.4234539568424225, + "rewards/rejected": -1.7255252599716187, + "rewards/safe_rewards": -1.2355064153671265, + "rewards/unsafe_rewards": -1.3686360120773315, + "step": 1710 + }, + { + "epoch": 0.93, + "learning_rate": 8.357199328144576e-09, + "logits/chosen": 0.35864904522895813, + "logits/rejected": 1.3251346349716187, + "logps/chosen": -372.8759460449219, + "logps/rejected": -383.86480712890625, + "loss": 43030.5625, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.3083076477050781, + "rewards/margins": 0.47592633962631226, + "rewards/rejected": -1.7842340469360352, + "rewards/safe_rewards": -1.3511359691619873, + "rewards/unsafe_rewards": -1.265479564666748, + "step": 1720 + }, + { + "epoch": 0.93, + "learning_rate": 7.1955293871198144e-09, + "logits/chosen": 0.8255189657211304, + "logits/rejected": 1.449049711227417, + "logps/chosen": -307.8855285644531, + "logps/rejected": -332.6614685058594, + "loss": 47112.9406, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.3356918096542358, + "rewards/margins": 0.39174309372901917, + "rewards/rejected": -1.7274351119995117, + "rewards/safe_rewards": -1.3239071369171143, + "rewards/unsafe_rewards": -1.3474764823913574, + "step": 1730 + }, + { + "epoch": 0.94, + "learning_rate": 6.119577262853254e-09, + "logits/chosen": 0.6256059408187866, + "logits/rejected": 2.0787105560302734, + "logps/chosen": -316.3446350097656, + "logps/rejected": -324.9921875, + "loss": 46752.4313, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -1.2886730432510376, + "rewards/margins": 0.5023485422134399, + "rewards/rejected": -1.791021704673767, + "rewards/safe_rewards": -1.2404489517211914, + "rewards/unsafe_rewards": -1.3368970155715942, + "step": 1740 + }, + { + "epoch": 0.94, + "learning_rate": 5.129722801180542e-09, + "logits/chosen": 0.751875102519989, + "logits/rejected": 1.8661807775497437, + "logps/chosen": -334.4009704589844, + "logps/rejected": -351.52459716796875, + "loss": 41086.0375, + "rewards/accuracies": 0.706250011920929, + "rewards/chosen": -1.3123013973236084, + "rewards/margins": 0.49850529432296753, + "rewards/rejected": -1.8108066320419312, + "rewards/safe_rewards": -1.3902431726455688, + "rewards/unsafe_rewards": -1.234359622001648, + "step": 1750 + }, + { + "epoch": 0.95, + "learning_rate": 4.226315452682816e-09, + "logits/chosen": 0.6390141248703003, + "logits/rejected": 1.7730823755264282, + "logps/chosen": -322.947998046875, + "logps/rejected": -343.3359069824219, + "loss": 45289.9969, + "rewards/accuracies": 0.768750011920929, + "rewards/chosen": -1.2204147577285767, + "rewards/margins": 0.47956544160842896, + "rewards/rejected": -1.69998037815094, + "rewards/safe_rewards": -1.2491962909698486, + "rewards/unsafe_rewards": -1.1916332244873047, + "step": 1760 + }, + { + "epoch": 0.95, + "learning_rate": 3.4096741493194193e-09, + "logits/chosen": 0.521572470664978, + "logits/rejected": 1.4751794338226318, + "logps/chosen": -328.8846435546875, + "logps/rejected": -344.24456787109375, + "loss": 48485.4094, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.2770588397979736, + "rewards/margins": 0.3794560432434082, + "rewards/rejected": -1.6565147638320923, + "rewards/safe_rewards": -1.245958924293518, + "rewards/unsafe_rewards": -1.3081586360931396, + "step": 1770 + }, + { + "epoch": 0.96, + "learning_rate": 2.6800871918346846e-09, + "logits/chosen": 0.38925060629844666, + "logits/rejected": 1.8243696689605713, + "logps/chosen": -335.72637939453125, + "logps/rejected": -353.7691650390625, + "loss": 43925.4062, + "rewards/accuracies": 0.7437499761581421, + "rewards/chosen": -1.2197164297103882, + "rewards/margins": 0.5167586207389832, + "rewards/rejected": -1.7364749908447266, + "rewards/safe_rewards": -1.279204249382019, + "rewards/unsafe_rewards": -1.1602284908294678, + "step": 1780 + }, + { + "epoch": 0.96, + "learning_rate": 2.0378121479783796e-09, + "logits/chosen": 0.7704240083694458, + "logits/rejected": 2.0140128135681152, + "logps/chosen": -328.3243103027344, + "logps/rejected": -350.25848388671875, + "loss": 46946.2625, + "rewards/accuracies": 0.78125, + "rewards/chosen": -1.3116792440414429, + "rewards/margins": 0.5153282880783081, + "rewards/rejected": -1.8270072937011719, + "rewards/safe_rewards": -1.2919623851776123, + "rewards/unsafe_rewards": -1.3313959836959839, + "step": 1790 + }, + { + "epoch": 0.97, + "learning_rate": 1.4830757615760247e-09, + "logits/chosen": 0.6130092144012451, + "logits/rejected": 1.9498767852783203, + "logps/chosen": -342.40252685546875, + "logps/rejected": -350.2261657714844, + "loss": 46527.5625, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -1.2705752849578857, + "rewards/margins": 0.4387969970703125, + "rewards/rejected": -1.7093721628189087, + "rewards/safe_rewards": -1.3210241794586182, + "rewards/unsafe_rewards": -1.2201263904571533, + "step": 1800 + }, + { + "epoch": 0.97, + "learning_rate": 1.0160738724809548e-09, + "logits/chosen": 0.41275280714035034, + "logits/rejected": 1.9665788412094116, + "logps/chosen": -319.31439208984375, + "logps/rejected": -350.4078674316406, + "loss": 43032.4125, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -1.2513651847839355, + "rewards/margins": 0.5249142646789551, + "rewards/rejected": -1.7762794494628906, + "rewards/safe_rewards": -1.2942630052566528, + "rewards/unsafe_rewards": -1.2084672451019287, + "step": 1810 + }, + { + "epoch": 0.98, + "learning_rate": 6.369713474366212e-10, + "logits/chosen": 0.6234462261199951, + "logits/rejected": 1.7797638177871704, + "logps/chosen": -360.6039123535156, + "logps/rejected": -386.88275146484375, + "loss": 42087.8531, + "rewards/accuracies": 0.7562500238418579, + "rewards/chosen": -1.3550912141799927, + "rewards/margins": 0.5382857322692871, + "rewards/rejected": -1.8933769464492798, + "rewards/safe_rewards": -1.3735519647598267, + "rewards/unsafe_rewards": -1.3366305828094482, + "step": 1820 + }, + { + "epoch": 0.98, + "learning_rate": 3.459020218731512e-10, + "logits/chosen": 0.5206863284111023, + "logits/rejected": 1.5652105808258057, + "logps/chosen": -312.03759765625, + "logps/rejected": -334.5227355957031, + "loss": 42263.0938, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -1.2279160022735596, + "rewards/margins": 0.5540409684181213, + "rewards/rejected": -1.7819570302963257, + "rewards/safe_rewards": -1.1894643306732178, + "rewards/unsafe_rewards": -1.2663676738739014, + "step": 1830 + }, + { + "epoch": 0.99, + "learning_rate": 1.429686526593088e-10, + "logits/chosen": 0.6488271355628967, + "logits/rejected": 1.6959701776504517, + "logps/chosen": -336.4909973144531, + "logps/rejected": -358.7371520996094, + "loss": 46722.6687, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -1.3301043510437012, + "rewards/margins": 0.45415645837783813, + "rewards/rejected": -1.7842607498168945, + "rewards/safe_rewards": -1.338212251663208, + "rewards/unsafe_rewards": -1.3219964504241943, + "step": 1840 + }, + { + "epoch": 1.0, + "learning_rate": 2.824288182584622e-11, + "logits/chosen": 0.33571261167526245, + "logits/rejected": 1.7277519702911377, + "logps/chosen": -341.6343688964844, + "logps/rejected": -348.6356201171875, + "loss": 43339.1687, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -1.248387098312378, + "rewards/margins": 0.4859175682067871, + "rewards/rejected": -1.734304428100586, + "rewards/safe_rewards": -1.2909047603607178, + "rewards/unsafe_rewards": -1.205869197845459, + "step": 1850 + }, + { + "epoch": 1.0, + "step": 1858, + "total_flos": 0.0, + "train_loss": 47961.330580597416, + "train_runtime": 22084.0044, + "train_samples_per_second": 2.693, + "train_steps_per_second": 0.084 + } + ], + "logging_steps": 10, + "max_steps": 1858, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}